date:20210624

[PATCH] drm/msm/dp: Add missing drm_device backpointer

2021-06-24 Thread Bjorn Andersson

'6cba3fe43341 ("drm/dp: Add backpointer to drm_device in drm_dp_aux")'
introduced a mandator drm_device backpointer in struct drm_dp_aux, but
missed the msm DP driver. Fix this.

Fixes: 6cba3fe43341 ("drm/dp: Add backpointer to drm_device in drm_dp_aux")
Signed-off-by: Bjorn Andersson 
---
 drivers/gpu/drm/msm/dp/dp_aux.c | 3 ++-
 drivers/gpu/drm/msm/dp/dp_aux.h | 2 +-
 drivers/gpu/drm/msm/dp/dp_display.c | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/msm/dp/dp_aux.c b/drivers/gpu/drm/msm/dp/dp_aux.c
index 4a3293b590b0..88659ed200b9 100644
--- a/drivers/gpu/drm/msm/dp/dp_aux.c
+++ b/drivers/gpu/drm/msm/dp/dp_aux.c
@@ -441,7 +441,7 @@ void dp_aux_deinit(struct drm_dp_aux *dp_aux)
dp_catalog_aux_enable(aux->catalog, false);
 }
 
-int dp_aux_register(struct drm_dp_aux *dp_aux)
+int dp_aux_register(struct drm_dp_aux *dp_aux, struct drm_device *drm_dev)
 {
struct dp_aux_private *aux;
int ret;
@@ -455,6 +455,7 @@ int dp_aux_register(struct drm_dp_aux *dp_aux)
 
aux->dp_aux.name = "dpu_dp_aux";
aux->dp_aux.dev = aux->dev;
+   aux->dp_aux.drm_dev = drm_dev;
aux->dp_aux.transfer = dp_aux_transfer;
ret = drm_dp_aux_register(&aux->dp_aux);
if (ret) {
diff --git a/drivers/gpu/drm/msm/dp/dp_aux.h b/drivers/gpu/drm/msm/dp/dp_aux.h
index 0728cc09c9ec..7ef0d83b483a 100644
--- a/drivers/gpu/drm/msm/dp/dp_aux.h
+++ b/drivers/gpu/drm/msm/dp/dp_aux.h
@@ -9,7 +9,7 @@
 #include "dp_catalog.h"
 #include 
 
-int dp_aux_register(struct drm_dp_aux *dp_aux);
+int dp_aux_register(struct drm_dp_aux *dp_aux, struct drm_device *drm_dev);
 void dp_aux_unregister(struct drm_dp_aux *dp_aux);
 void dp_aux_isr(struct drm_dp_aux *dp_aux);
 void dp_aux_init(struct drm_dp_aux *dp_aux);
diff --git a/drivers/gpu/drm/msm/dp/dp_display.c 
b/drivers/gpu/drm/msm/dp/dp_display.c
index c26562bd85fe..2f0a5c13f251 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -259,7 +259,7 @@ static int dp_display_bind(struct device *dev, struct 
device *master,
return rc;
}
 
-   rc = dp_aux_register(dp->aux);
+   rc = dp_aux_register(dp->aux, drm);
if (rc) {
DRM_ERROR("DRM DP AUX register failed\n");
return rc;
-- 
2.29.2

Re: [PATCH 11/17] drm/msm/dpu: drop src_split and multirect check from dpu_crtc_atomic_check

2021-06-24 Thread kernel test robot

Hi Dmitry,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on next-20210624]
[also build test WARNING on v5.13-rc7]
[cannot apply to linus/master v5.13-rc7 v5.13-rc6 v5.13-rc5]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Dmitry-Baryshkov/drm-msm-dpu-switch-dpu_plane-to-be-virtual/20210624-225947
base:2a8927f0efb6fb34b9d11dab3bd3f018e866d36d
config: arm-defconfig (attached as .config)
compiler: arm-linux-gnueabi-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/0day-ci/linux/commit/3842e184f54916b9d22989d840a70bfb0bfebf10
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Dmitry-Baryshkov/drm-msm-dpu-switch-dpu_plane-to-be-virtual/20210624-225947
git checkout 3842e184f54916b9d22989d840a70bfb0bfebf10
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=arm 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All warnings (new ones prefixed by >>):

   drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c: In function 
'dpu_crtc_atomic_check':
>> drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c:898:23: warning: variable 
>> 'mixer_width' set but not used [-Wunused-but-set-variable]
 898 |  int cnt = 0, rc = 0, mixer_width = 0, i;
 |   ^~~


vim +/mixer_width +898 drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c

   884  
   885  static int dpu_crtc_atomic_check(struct drm_crtc *crtc,
   886  struct drm_atomic_state *state)
   887  {
   888  struct drm_crtc_state *crtc_state = 
drm_atomic_get_new_crtc_state(state,
   889  
  crtc);
   890  struct dpu_crtc *dpu_crtc = to_dpu_crtc(crtc);
   891  struct dpu_crtc_state *cstate = to_dpu_crtc_state(crtc_state);
   892  struct plane_state *pstates;
   893  
   894  const struct drm_plane_state *pstate;
   895  struct drm_plane *plane;
   896  struct drm_display_mode *mode;
   897  
 > 898  int cnt = 0, rc = 0, mixer_width = 0, i;
   899  
   900  struct drm_rect crtc_rect = { 0 };
   901  
   902  pstates = kzalloc(sizeof(*pstates) * DPU_STAGE_MAX * 4, 
GFP_KERNEL);
   903  
   904  if (!crtc_state->enable || !crtc_state->active) {
   905  DRM_DEBUG_ATOMIC("crtc%d -> enable %d, active %d, skip 
atomic_check\n",
   906  crtc->base.id, crtc_state->enable,
   907  crtc_state->active);
   908  memset(&cstate->new_perf, 0, sizeof(cstate->new_perf));
   909  goto end;
   910  }
   911  
   912  mode = &crtc_state->adjusted_mode;
   913  DRM_DEBUG_ATOMIC("%s: check\n", dpu_crtc->name);
   914  
   915  /* force a full mode set if active state changed */
   916  if (crtc_state->active_changed)
   917  crtc_state->mode_changed = true;
   918  
   919  if (cstate->num_mixers) {
   920  mixer_width = mode->hdisplay / cstate->num_mixers;
   921  
   922  _dpu_crtc_setup_lm_bounds(crtc, crtc_state);
   923  }
   924  
   925  crtc_rect.x2 = mode->hdisplay;
   926  crtc_rect.y2 = mode->vdisplay;
   927  
   928   /* get plane state for all drm planes associated with crtc 
state */
   929  drm_atomic_crtc_state_for_each_plane_state(plane, pstate, 
crtc_state) {
   930  struct drm_rect dst, clip = crtc_rect;
   931  
   932  if (IS_ERR_OR_NULL(pstate)) {
   933  rc = PTR_ERR(pstate);
   934  DPU_ERROR("%s: failed to get plane%d state, 
%d\n",
   935  dpu_crtc->name, plane->base.id, 
rc);
   936  goto end;
   937  }
   938  if (cnt >= DPU_STAGE_MAX * 4)
   939  continue;
   940  
   941  pstates[cnt].dpu_pstate = to_dpu_plane_state(pstate);
   942  pstates[cnt].drm_pstate = pstate;
   943  pstates[cnt].stage = pstate->normalized_zpos;
   944  
   945  dpu_plane_clear_multirect(pstate);
   946  
   947  cnt++;
   948  
   949  dst = d

Re: [PATCH v5 3/5] drm/msm: Improve the a6xx page fault handler

2021-06-24 Thread Bjorn Andersson

On Thu 10 Jun 16:44 CDT 2021, Rob Clark wrote:
[..]
> diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
> index 50d881794758..6975b95c3c29 100644
> --- a/drivers/gpu/drm/msm/msm_iommu.c
> +++ b/drivers/gpu/drm/msm/msm_iommu.c
> @@ -211,8 +211,17 @@ static int msm_fault_handler(struct iommu_domain 
> *domain, struct device *dev,
>   unsigned long iova, int flags, void *arg)
>  {
>   struct msm_iommu *iommu = arg;
> + struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(iommu->base.dev);
> + struct adreno_smmu_fault_info info, *ptr = NULL;
> +
> + if (adreno_smmu->get_fault_info) {

This seemed reasonable when I read it last time, but I didn't realize
that the msm_fault_handler() is installed for all msm_iommu instances.

So while we're trying to recover from the boot splash and setup the new
framebuffer we end up here with iommu->base.dev being the mdss device.
Naturally drvdata of mdss is not a struct adreno_smmu_priv.

> + adreno_smmu->get_fault_info(adreno_smmu->cookie, &info);

So here we just jump straight out into hyperspace, never to return.

Not sure how to wire this up to avoid the problem, but right now I don't
think we can boot any device with a boot splash.

Regards,
Bjorn

> + ptr = &info;
> + }
> +
>   if (iommu->base.handler)
> - return iommu->base.handler(iommu->base.arg, iova, flags);
> + return iommu->base.handler(iommu->base.arg, iova, flags, ptr);
> +
>   pr_warn_ratelimited("*** fault: iova=%16lx, flags=%d\n", iova, flags);
>   return 0;
>  }

[PATCH] drm/amdgpu: use kvcalloc for entry->entries.

2021-06-24 Thread huqiqiao

kmalloc_array + __GFP_ZERO is the same with kvcalloc.

Signed-off-by: huqiqiao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9acee4a5b2ba..5a012321d09e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -909,8 +909,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 
num_entries = amdgpu_vm_num_entries(adev, cursor->level);
entry->entries = kvmalloc_array(num_entries,
-   sizeof(*entry->entries),
-   GFP_KERNEL | __GFP_ZERO);
+   sizeof(*entry->entries), GFP_KERNEL);
if (!entry->entries)
return -ENOMEM;
}
-- 
2.11.0

[PATCH] drm/amdgpu: use kvcalloc for entry->entries

2021-06-24 Thread huqiqiao

malloc_array + __GFP_ZERO is the same with kvcalloc.

Signed-off-by: huqiqiao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b8c31e3469c0..5cadfadc625f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -909,7 +909,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 
num_entries = amdgpu_vm_num_entries(adev, cursor->level);
entry->entries = kvcalloc(num_entries,
-   sizeof(*entry->entries), GFP_KERNEL);
+   sizeof(*entry->entries), GFP_KERNEL);
if (!entry->entries)
return -ENOMEM;
}
-- 
2.11.0

Re: [PATH 0/4] [RFC] Support virtual DRM

2021-06-24 Thread Esaki Tomohito




On 2021/06/23 20:41, Pekka Paalanen wrote:
> On Wed, 23 Jun 2021 18:22:47 +0900
> Esaki Tomohito  wrote:
> 
>> On 2021/06/23 17:39, Pekka Paalanen wrote:
>>> On Wed, 23 Jun 2021 15:56:05 +0900
>>> Esaki Tomohito  wrote:
>>>   
 Hi,
 Thank you all for your comments.

 On 2021/06/22 17:12, Pekka Paalanen wrote:  
> On Tue, 22 Jun 2021 13:03:39 +0900
> Esaki Tomohito  wrote:
> 
>> Hi, Enrico Weigelt
>> Thank you for reply.
>>
>> On 2021/06/22 1:05, Enrico Weigelt, metux IT consult wrote:
>>> On 21.06.21 08:27, Tomohito Esaki wrote:
>>>
>>> Hi,
>>>   
 Virtual DRM splits the overlay planes of a display controller into 
 multiple
 virtual devices to allow each plane to be accessed by each process.

 This makes it possible to overlay images output from multiple 
 processes on a
 display. For example, one process displays the camera image without 
 compositor
 while another process overlays the UI.  
>>>
>>> Are you attempting to create an simple in-kernel compositor ?  
>>
>> I think the basic idea is the same as DRMlease.
>
> Hi,
>
> indeed. Why not use DRM leases instead?
> 

 In this use case, I understand that this is not possible with DRM lease,
 am I wrong?
 I understand that it’s not possible to lease a plane and update planes
 on the same output independently from different processes in current DRM
 lease.

 If this is correct, what do you think of adding support for plane leases
 to the DRM lease to handle this case?  
>>>
>>> Hi,
>>>
>>> I would love to see support added for leasing individual planes,
>>> especially to replace the virtual DRM proposal which seems to be
>>> eradicating everything that atomic modesetting and nuclear pageflip
>>> have built over the many years.
>>>
>>> However, please note that "on the same output independently" is
>>> physically impossible. Semantically, the planes define what a CRTC
>>> scans out, and the CRTC defines the scanout timings. Therefore it is not
>>> possible to update individual planes independently, they will all
>>> always share the timings of the CRTC.
>>>
>>> That combined with KMS not allowing multiple updates to be queued at
>>> the same time for the same CRTC (atomic commits and legacy pageflips
>>> returning EBUSY) makes the plane updates very much inter-dependent.
>>>
>>> If you want to avoid EBUSY and have planes update on the vblank you
>>> intended, you really need a userspace compositor to pull everything
>>> together *before* submitting anything to the kernel.  
>>
>> Hi,
>>
>> Thank you for your comments and advice.
>> I will consider leasing a plane.
> 
> Hi,
> 
> I wish you considered a userspace compositor first, once more, with
> passion.
> 
> It does not need to be Weston, and it does not need to use Wayland.
> Just a userspace daemon that owns the whole display device and somehow
> talks to whatever else wants stuff on screen.
> 
> I have not seen any evidence that leasing individual planes would do
> you any good. I can easily see it doing you harm. I'm only saying that
> it would be better than the virtual DRM proposal if you absolutely have
> to go there. Please, consider not going there at all.
> 
> "On the same output independently" is not possible for the very simple
> reason that the pixel data needs to be streamed serially to a monitor.
> 

Hi,

Thank you for your advice.
Once again, I'll consider a userspace compositor first.

Best regards
Esaki

Re:Re: [PATCH] drm/amdgpu:use kvcalloc instead of kvmalloc_array

2021-06-24 Thread 胡奇巧

OK, I'll revise it and submit it againGeorge.
  From: "Christian König "To: "huqiqiao ","airlied ","daniel "CC: "dri-devel ","amd-gfx ","linux-kernel "Sent: 2021-06-24 21:14Subject: Re: [PATCH] drm/amdgpu:use kvcalloc instead of kvmalloc_array
  Am 23.06.21 um 11:12 schrieb huqiqiao:  > kvmalloc_array + __GFP_ZERO is the same with kvcalloc.  >  > Signed-off-by: huqiqiao   > ---  > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 ++---  > 1 file changed, 2 insertions(+), 3 deletions(-)  >  > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  > index 9acee4a5b2ba..50edc73525b0 100644  > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  > @@ -908,9 +908,8 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,  > unsigned num_entries;  > 
  > num_entries = amdgpu_vm_num_entries(adev, cursor->level);  > - entry->entries = kvmalloc_array(num_entries,  > - sizeof(*entry->entries),  > - GFP_KERNEL | __GFP_ZERO);  > + entry->entries = kvcalloc(num_entries,  > + sizeof(*entry->entries), GFP_KERNEL);  
  Sounds like a good idea in general, but the indentation on the second 
  line seems to be of.  
  Christian.  
  > if (!entry->entries)  > return -ENOMEM;  > }

Re: [Intel-gfx] [PATCH 44/47] drm/i915/guc: Connect reset modparam updates to GuC policy flags

2021-06-24 Thread Matthew Brost

On Thu, Jun 24, 2021 at 12:05:13AM -0700, Matthew Brost wrote:
> From: John Harrison 
> 
> Changing the reset module parameter has no effect on a running GuC.
> The corresponding entry in the ADS must be updated and then the GuC
> informed via a Host2GuC message.
> 
> The new debugfs interface to module parameters allows this to happen.
> However, connecting the parameter data address back to anything useful
> is messy. One option would be to pass a new private data structure
> address through instead of just the parameter pointer. However, that
> means having a new (and different) data structure for each parameter
> and a new (and different) write function for each parameter. This
> method keeps everything generic by instead using a string lookup on
> the directory entry name.
> 
> Signed-off-by: John Harrison 
> Signed-off-by: Matthew Brost 
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c |  2 +-
>  drivers/gpu/drm/i915/i915_debugfs_params.c | 31 ++
>  2 files changed, 32 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> index 2ad5fcd4e1b7..c6d0b762d82c 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> @@ -99,7 +99,7 @@ static int guc_action_policies_update(struct intel_guc 
> *guc, u32 policy_offset)
>   policy_offset
>   };
>  
> - return intel_guc_send(guc, action, ARRAY_SIZE(action));
> + return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, 
> true);
>  }
>  
>  int intel_guc_global_policies_update(struct intel_guc *guc)
> diff --git a/drivers/gpu/drm/i915/i915_debugfs_params.c 
> b/drivers/gpu/drm/i915/i915_debugfs_params.c
> index 4e2b077692cb..8ecd8b42f048 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs_params.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs_params.c
> @@ -6,9 +6,20 @@
>  #include 
>  
>  #include "i915_debugfs_params.h"
> +#include "gt/intel_gt.h"
> +#include "gt/uc/intel_guc.h"
>  #include "i915_drv.h"
>  #include "i915_params.h"
>  
> +#define MATCH_DEBUGFS_NODE_NAME(_file, _name)
> (strcmp((_file)->f_path.dentry->d_name.name, (_name)) == 0)
> +
> +#define GET_I915(i915, name, ptr)\
> + do {\
> + struct i915_params *params; \
> + params = container_of(((void *) (ptr)), typeof(*params), name); 
> \
> + (i915) = container_of(params, typeof(*(i915)), params); \
> + } while(0)
> +
>  /* int param */
>  static int i915_param_int_show(struct seq_file *m, void *data)
>  {
> @@ -24,6 +35,16 @@ static int i915_param_int_open(struct inode *inode, struct 
> file *file)
>   return single_open(file, i915_param_int_show, inode->i_private);
>  }
>  
> +static int notify_guc(struct drm_i915_private *i915)
> +{
> + int ret = 0;
> +
> + if (intel_uc_uses_guc_submission(&i915->gt.uc))
> + ret = intel_guc_global_policies_update(&i915->gt.uc.guc);
> +
> + return ret;
> +}
> +
>  static ssize_t i915_param_int_write(struct file *file,
>   const char __user *ubuf, size_t len,
>   loff_t *offp)
> @@ -81,8 +102,10 @@ static ssize_t i915_param_uint_write(struct file *file,
>const char __user *ubuf, size_t len,
>loff_t *offp)
>  {
> + struct drm_i915_private *i915;
>   struct seq_file *m = file->private_data;
>   unsigned int *value = m->private;
> + unsigned int old = *value;
>   int ret;
>  
>   ret = kstrtouint_from_user(ubuf, len, 0, value);
> @@ -95,6 +118,14 @@ static ssize_t i915_param_uint_write(struct file *file,
>   *value = b;
>   }
>  
> + if (!ret && MATCH_DEBUGFS_NODE_NAME(file, "reset")) {
> + GET_I915(i915, reset, value);

We might want to make this into a macro in case we need to update more
than just "reset" with the GuC going forward but that is not a blocker.

With that:
Reviewed-by: Matthew Brost  

> +
> + ret = notify_guc(i915);
> + if (ret)
> + *value = old;
> + }
> +
>   return ret ?: len;
>  }
>  
> -- 
> 2.28.0
> 
> ___
> Intel-gfx mailing list
> intel-...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 43/47] drm/i915/guc: Hook GuC scheduling policies up

2021-06-24 Thread Matthew Brost

On Thu, Jun 24, 2021 at 12:05:12AM -0700, Matthew Brost wrote:
> From: John Harrison 
> 
> Use the official driver default scheduling policies for configuring
> the GuC scheduler rather than a bunch of hardcoded values.
> 
> Signed-off-by: John Harrison 
> Signed-off-by: Matthew Brost 
> Cc: Jose Souza 
> ---
>  drivers/gpu/drm/i915/gt/intel_engine_types.h  |  1 +
>  drivers/gpu/drm/i915/gt/uc/intel_guc.h|  2 +
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c| 44 ++-
>  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 11 +++--
>  4 files changed, 53 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
> b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 0ceffa2be7a7..37db857bb56c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -455,6 +455,7 @@ struct intel_engine_cs {
>  #define I915_ENGINE_IS_VIRTUAL   BIT(5)
>  #define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
>  #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
> +#define I915_ENGINE_WANT_FORCED_PREEMPTION BIT(8)
>   unsigned int flags;
>  
>   /*
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> index c38365cd5fab..905ecbc7dbe3 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> @@ -270,6 +270,8 @@ int intel_guc_engine_failure_process_msg(struct intel_guc 
> *guc,
>  
>  void intel_guc_find_hung_context(struct intel_engine_cs *engine);
>  
> +int intel_guc_global_policies_update(struct intel_guc *guc);
> +
>  void intel_guc_submission_reset_prepare(struct intel_guc *guc);
>  void intel_guc_submission_reset(struct intel_guc *guc, bool stalled);
>  void intel_guc_submission_reset_finish(struct intel_guc *guc);
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> index d3e86ab7508f..2ad5fcd4e1b7 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> @@ -77,14 +77,54 @@ static u32 guc_ads_blob_size(struct intel_guc *guc)
>  guc_ads_private_data_size(guc);
>  }
>  
> -static void guc_policies_init(struct guc_policies *policies)
> +static void guc_policies_init(struct intel_guc *guc, struct guc_policies 
> *policies)
>  {
> + struct intel_gt *gt = guc_to_gt(guc);
> + struct drm_i915_private *i915 = gt->i915;
> +
>   policies->dpc_promote_time = GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US;
>   policies->max_num_work_items = GLOBAL_POLICY_MAX_NUM_WI;
> +
>   policies->global_flags = 0;
> + if (i915->params.reset < 2)
> + policies->global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET;
> +
>   policies->is_valid = 1;
>  }
>  
> +static int guc_action_policies_update(struct intel_guc *guc, u32 
> policy_offset)
> +{
> + u32 action[] = {
> + INTEL_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE,
> + policy_offset
> + };
> +
> + return intel_guc_send(guc, action, ARRAY_SIZE(action));
> +}
> +
> +int intel_guc_global_policies_update(struct intel_guc *guc)
> +{
> + struct __guc_ads_blob *blob = guc->ads_blob;
> + struct intel_gt *gt = guc_to_gt(guc);
> + intel_wakeref_t wakeref;
> + int ret;
> +
> + if (!blob)
> + return -ENOTSUPP;
> +
> + GEM_BUG_ON(!blob->ads.scheduler_policies);
> +
> + guc_policies_init(guc, &blob->policies);
> +
> + if (!intel_guc_is_ready(guc))
> + return 0;
> +
> + with_intel_runtime_pm(>->i915->runtime_pm, wakeref)
> + ret = guc_action_policies_update(guc, 
> blob->ads.scheduler_policies);
> +
> + return ret;
> +}
> +
>  static void guc_mapping_table_init(struct intel_gt *gt,
>  struct guc_gt_system_info *system_info)
>  {
> @@ -281,7 +321,7 @@ static void __guc_ads_init(struct intel_guc *guc)
>   u8 engine_class, guc_class;
>  
>   /* GuC scheduling policies */
> - guc_policies_init(&blob->policies);
> + guc_policies_init(guc, &blob->policies);
>  
>   /*
>* GuC expects a per-engine-class context image and size
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 6188189314d5..a427336ce916 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -873,6 +873,7 @@ void intel_guc_submission_reset_finish(struct intel_guc 
> *guc)
>   GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h));
>   atomic_set(&guc->outstanding_submission_g2h, 0);
>  
> + intel_guc_global_policies_update(guc);
>   enable_submission(guc);
>   intel_gt_unpark_heartbeats(guc_to_gt(guc));
>  }
> @@ -1161,8 +1162,12 @@ static void guc_context_policy_init(struct 
> intel_engine_cs *engine,
>  {
>   desc->policy_flags = 0;
>  
> - desc->execution_

Re: [PATCH v15 00/12] Restricted DMA

2021-06-24 Thread Claire Chang

On Fri, Jun 25, 2021 at 3:20 AM Konrad Rzeszutek Wilk
 wrote:
>
> On Thu, Jun 24, 2021 at 11:55:14PM +0800, Claire Chang wrote:
> > This series implements mitigations for lack of DMA access control on
> > systems without an IOMMU, which could result in the DMA accessing the
> > system memory at unexpected times and/or unexpected addresses, possibly
> > leading to data leakage or corruption.
> >
> > For example, we plan to use the PCI-e bus for Wi-Fi and that PCI-e bus is
> > not behind an IOMMU. As PCI-e, by design, gives the device full access to
> > system memory, a vulnerability in the Wi-Fi firmware could easily escalate
> > to a full system exploit (remote wifi exploits: [1a], [1b] that shows a
> > full chain of exploits; [2], [3]).
> >
> > To mitigate the security concerns, we introduce restricted DMA. Restricted
> > DMA utilizes the existing swiotlb to bounce streaming DMA in and out of a
> > specially allocated region and does memory allocation from the same region.
> > The feature on its own provides a basic level of protection against the DMA
> > overwriting buffer contents at unexpected times. However, to protect
> > against general data leakage and system memory corruption, the system needs
> > to provide a way to restrict the DMA to a predefined memory region (this is
> > usually done at firmware level, e.g. MPU in ATF on some ARM platforms [4]).
> >
> > [1a] 
> > https://googleprojectzero.blogspot.com/2017/04/over-air-exploiting-broadcoms-wi-fi_4.html
> > [1b] 
> > https://googleprojectzero.blogspot.com/2017/04/over-air-exploiting-broadcoms-wi-fi_11.html
> > [2] https://blade.tencent.com/en/advisories/qualpwn/
> > [3] 
> > https://www.bleepingcomputer.com/news/security/vulnerabilities-found-in-highly-popular-firmware-for-wifi-chips/
> > [4] 
> > https://github.com/ARM-software/arm-trusted-firmware/blob/master/plat/mediatek/mt8183/drivers/emi_mpu/emi_mpu.c#L132
> >
> > v15:
> > - Apply Will's diff 
> > (https://lore.kernel.org/patchwork/patch/1448957/#1647521)
> >   to fix the crash reported by Qian.
> > - Add Stefano's Acked-by tag for patch 01/12 from v14
>
> That all should be now be on
>
> https://git.kernel.org/pub/scm/linux/kernel/git/konrad/swiotlb.git/
> devel/for-linus-5.14 (and linux-next)
>

devel/for-linus-5.14 looks good. Thanks!

Re: [PATCH 0/6] KVM: Remove uses of struct page from x86 and arm64 MMU

2021-06-24 Thread Nicholas Piggin

Excerpts from Paolo Bonzini's message of June 25, 2021 1:35 am:
> On 24/06/21 14:57, Nicholas Piggin wrote:
>> KVM: Fix page ref underflow for regions with valid but non-refcounted pages
> 
> It doesn't really fix the underflow, it disallows mapping them in the 
> first place.  Since in principle things can break, I'd rather be 
> explicit, so let's go with "KVM: do not allow mapping valid but 
> non-reference-counted pages".
> 
>> It's possible to create a region which maps valid but non-refcounted
>> pages (e.g., tail pages of non-compound higher order allocations). These
>> host pages can then be returned by gfn_to_page, gfn_to_pfn, etc., family
>> of APIs, which take a reference to the page, which takes it from 0 to 1.
>> When the reference is dropped, this will free the page incorrectly.
>> 
>> Fix this by only taking a reference on the page if it was non-zero,
> 
> s/on the page/on valid pages/ (makes clear that invalid pages are fine 
> without refcounting).

That seems okay, you can adjust the title or changelog as you like.

> Thank you *so* much, I'm awful at Linux mm.

Glad to help. Easy to see why you were taking this approach because the 
API really does need to be improved and even a pretty intwined with mm 
subsystem like KVM shouldn't _really_ be doing this kind of trick (and
it should go away when old API is removed).

Thanks,
Nick

Re: [Intel-gfx] [PATCH 05/47] drm/i915/guc: Add stall timer to non blocking CTB send function

2021-06-24 Thread Matthew Brost

On Thu, Jun 24, 2021 at 07:37:01PM +0200, Michal Wajdeczko wrote:
> 
> 
> On 24.06.2021 09:04, Matthew Brost wrote:
> > Implement a stall timer which fails H2G CTBs once a period of time
> > with no forward progress is reached to prevent deadlock.
> > 
> > Also update to ct_write to return -EIO rather than -EPIPE on a
> > corrupted descriptor.
> 
> by doing so you will have the same error code for two different problems:
> 
> a) corrupted CTB descriptor (definitely unrecoverable)
> b) long stall in CTB processing (still recoverable)
> 

Already discussed both are treated exactly the same by the rest of the
stack so we return a single error code. 

> while caller is explicitly instructed to retry only on:
> 
> c) temporary stall in CTB processing (likely recoverable)
> 
> so why do we want to limit our diagnostics?
> 
> > 
> > Signed-off-by: John Harrison 
> > Signed-off-by: Daniele Ceraolo Spurio 
> > Signed-off-by: Matthew Brost 
> > ---
> >  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 47 +--
> >  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |  4 ++
> >  2 files changed, 48 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > index c9a65d05911f..27ec30b5ef47 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > @@ -319,6 +319,7 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct)
> > goto err_deregister;
> >  
> > ct->enabled = true;
> > +   ct->stall_time = KTIME_MAX;
> >  
> > return 0;
> >  
> > @@ -392,7 +393,7 @@ static int ct_write(struct intel_guc_ct *ct,
> > unsigned int i;
> >  
> > if (unlikely(ctb->broken))
> > -   return -EPIPE;
> > +   return -EIO;
> >  
> > if (unlikely(desc->status))
> > goto corrupted;
> > @@ -464,7 +465,7 @@ static int ct_write(struct intel_guc_ct *ct,
> > CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n",
> >  desc->head, desc->tail, desc->status);
> > ctb->broken = true;
> > -   return -EPIPE;
> > +   return -EIO;
> >  }
> >  
> >  /**
> > @@ -507,6 +508,18 @@ static int wait_for_ct_request_update(struct 
> > ct_request *req, u32 *status)
> > return err;
> >  }
> >  
> > +#define GUC_CTB_TIMEOUT_MS 1500
> 
> it's 150% of core CTB timeout, maybe we should correlate them ?
>

Seems overkill.
 
> > +static inline bool ct_deadlocked(struct intel_guc_ct *ct)
> > +{
> > +   long timeout = GUC_CTB_TIMEOUT_MS;
> > +   bool ret = ktime_ms_delta(ktime_get(), ct->stall_time) > timeout;
> > +
> > +   if (unlikely(ret))
> > +   CT_ERROR(ct, "CT deadlocked\n");
> 
> nit: in commit message you said all these changes are to "prevent
> deadlock" so maybe this message should rather be:
> 
>   int delta = ktime_ms_delta(ktime_get(), ct->stall_time);
> 
>   CT_ERROR(ct, "Communication stalled for %dms\n", delta);
>

Sure.
 
> (note that CT_ERROR already adds "CT" prefix)
>
> > +
> > +   return ret;
> > +}
> > +
> >  static inline bool h2g_has_room(struct intel_guc_ct_buffer *ctb, u32 
> > len_dw)
> >  {
> > struct guc_ct_buffer_desc *desc = ctb->desc;
> > @@ -518,6 +531,26 @@ static inline bool h2g_has_room(struct 
> > intel_guc_ct_buffer *ctb, u32 len_dw)
> > return space >= len_dw;
> >  }
> >  
> > +static int has_room_nb(struct intel_guc_ct *ct, u32 len_dw)
> > +{
> > +   struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
> > +
> > +   lockdep_assert_held(&ct->ctbs.send.lock);
> > +
> > +   if (unlikely(!h2g_has_room(ctb, len_dw))) {
> > +   if (ct->stall_time == KTIME_MAX)
> > +   ct->stall_time = ktime_get();
> > +
> > +   if (unlikely(ct_deadlocked(ct)))
> 
> and maybe above message should be printed somewhere around here when we
> detect "deadlock" for the first time?
> 

Not sure I follow. The error message is in the correct place if ask me.
Probably should set the broken flag though when the message is printed
though. 

> > +   return -EIO;
> > +   else
> > +   return -EBUSY;
> > +   }
> > +
> > +   ct->stall_time = KTIME_MAX;
> > +   return 0;
> > +}
> > +
> >  static int ct_send_nb(struct intel_guc_ct *ct,
> >   const u32 *action,
> >   u32 len,
> > @@ -530,7 +563,7 @@ static int ct_send_nb(struct intel_guc_ct *ct,
> >  
> > spin_lock_irqsave(&ctb->lock, spin_flags);
> >  
> > -   ret = h2g_has_room(ctb, len + 1);
> > +   ret = has_room_nb(ct, len + 1);
> > if (unlikely(ret))
> > goto out;
> >  
> > @@ -574,11 +607,19 @@ static int ct_send(struct intel_guc_ct *ct,
> >  retry:
> > spin_lock_irqsave(&ct->ctbs.send.lock, flags);
> > if (unlikely(!h2g_has_room(ctb, len + 1))) {
> > +   if (ct->stall_time == KTIME_MAX)
> > +   ct->stall_time = ktime_get();
> 
> as this is a repeated pattern, maybe it should be moved to h2g_has_room
> or other

Re: [Intel-gfx] [PATCH 04/47] drm/i915/guc: Add non blocking CTB send function

2021-06-24 Thread Matthew Brost

On Thu, Jun 24, 2021 at 07:02:18PM +0200, Michal Wajdeczko wrote:
> 
> 
> On 24.06.2021 17:49, Matthew Brost wrote:
> > On Thu, Jun 24, 2021 at 04:48:32PM +0200, Michal Wajdeczko wrote:
> >>
> >>
> >> On 24.06.2021 09:04, Matthew Brost wrote:
> >>> Add non blocking CTB send function, intel_guc_send_nb. GuC submission
> >>> will send CTBs in the critical path and does not need to wait for these
> >>> CTBs to complete before moving on, hence the need for this new function.
> >>>
> >>> The non-blocking CTB now must have a flow control mechanism to ensure
> >>> the buffer isn't overrun. A lazy spin wait is used as we believe the
> >>> flow control condition should be rare with a properly sized buffer.
> >>>
> >>> The function, intel_guc_send_nb, is exported in this patch but unused.
> >>> Several patches later in the series make use of this function.
> >>>
> >>> Signed-off-by: John Harrison 
> >>> Signed-off-by: Matthew Brost 
> >>> ---
> >>>  drivers/gpu/drm/i915/gt/uc/intel_guc.h| 12 +++-
> >>>  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 77 +--
> >>>  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |  3 +-
> >>>  3 files changed, 82 insertions(+), 10 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
> >>> b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> >>> index 4abc59f6f3cd..24b1df6ad4ae 100644
> >>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> >>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> >>> @@ -74,7 +74,15 @@ static inline struct intel_guc *log_to_guc(struct 
> >>> intel_guc_log *log)
> >>>  static
> >>>  inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 
> >>> len)
> >>>  {
> >>> - return intel_guc_ct_send(&guc->ct, action, len, NULL, 0);
> >>> + return intel_guc_ct_send(&guc->ct, action, len, NULL, 0, 0);
> >>> +}
> >>> +
> >>> +#define INTEL_GUC_SEND_NBBIT(31)
> >>
> >> hmm, this flag really belongs to intel_guc_ct_send() so it should be
> >> defined as CTB flag near that function declaration
> >>
> > 
> > I can move this up a few lines.
> > 
> >>> +static
> >>> +inline int intel_guc_send_nb(struct intel_guc *guc, const u32 *action, 
> >>> u32 len)
> >>> +{
> >>> + return intel_guc_ct_send(&guc->ct, action, len, NULL, 0,
> >>> +  INTEL_GUC_SEND_NB);
> >>>  }
> >>>  
> >>>  static inline int
> >>> @@ -82,7 +90,7 @@ intel_guc_send_and_receive(struct intel_guc *guc, const 
> >>> u32 *action, u32 len,
> >>>  u32 *response_buf, u32 response_buf_size)
> >>>  {
> >>>   return intel_guc_ct_send(&guc->ct, action, len,
> >>> -  response_buf, response_buf_size);
> >>> +  response_buf, response_buf_size, 0);
> >>>  }
> >>>  
> >>>  static inline void intel_guc_to_host_event_handler(struct intel_guc *guc)
> >>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
> >>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> >>> index a17215920e58..c9a65d05911f 100644
> >>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> >>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> >>> @@ -3,6 +3,11 @@
> >>>   * Copyright © 2016-2019 Intel Corporation
> >>>   */
> >>>  
> >>> +#include 
> >>> +#include 
> >>> +#include 
> >>> +#include 
> >>> +
> >>>  #include "i915_drv.h"
> >>>  #include "intel_guc_ct.h"
> >>>  #include "gt/intel_gt.h"
> >>> @@ -373,7 +378,7 @@ static void write_barrier(struct intel_guc_ct *ct)
> >>>  static int ct_write(struct intel_guc_ct *ct,
> >>>   const u32 *action,
> >>>   u32 len /* in dwords */,
> >>> - u32 fence)
> >>> + u32 fence, u32 flags)
> >>>  {
> >>>   struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
> >>>   struct guc_ct_buffer_desc *desc = ctb->desc;
> >>> @@ -421,9 +426,13 @@ static int ct_write(struct intel_guc_ct *ct,
> >>>FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) |
> >>>FIELD_PREP(GUC_CTB_MSG_0_FENCE, fence);
> >>>  
> >>> - hxg = FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
> >>> -   FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION |
> >>> -  GUC_HXG_REQUEST_MSG_0_DATA0, action[0]);
> >>> + hxg = (flags & INTEL_GUC_SEND_NB) ?
> >>> + (FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
> >>> +  FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
> >>> + GUC_HXG_EVENT_MSG_0_DATA0, action[0])) :
> >>> + (FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
> >>> +  FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION |
> >>> + GUC_HXG_REQUEST_MSG_0_DATA0, action[0]));
> >>
> >> or as we already switched to accept and return whole HXG messages in
> >> guc_send_mmio() maybe we should do the same for CTB variant too and
> >> instead of using extra flag just let caller to prepare proper HXG header
> >> with HXG_EVENT type and then in CTB code just look at this type to make
> >> decision which code path to use
> >>
> > 
> > Not sure I follow. Anyways could this be done in a fo

[Bug 213561] [bisected][regression] GFX10 AMDGPUs can no longer enter idle state after commit. Commit has been pushed to stable branches too.

2021-06-24 Thread bugzilla-daemon

https://bugzilla.kernel.org/show_bug.cgi?id=213561

Linux_Chemist (untaintablean...@hotmail.co.uk) changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |CODE_FIX

--- Comment #10 from Linux_Chemist (untaintablean...@hotmail.co.uk) ---
Thank you :) I'll mark this as resolved since the problem is known and code has
been reverted ready for the next kernels.

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.

Re: [Intel-gfx] [PATCH 04/47] drm/i915/guc: Add non blocking CTB send function

2021-06-24 Thread Matthew Brost

On Thu, Jun 24, 2021 at 07:02:18PM +0200, Michal Wajdeczko wrote:
> 
> 
> On 24.06.2021 17:49, Matthew Brost wrote:
> > On Thu, Jun 24, 2021 at 04:48:32PM +0200, Michal Wajdeczko wrote:
> >>
> >>
> >> On 24.06.2021 09:04, Matthew Brost wrote:
> >>> Add non blocking CTB send function, intel_guc_send_nb. GuC submission
> >>> will send CTBs in the critical path and does not need to wait for these
> >>> CTBs to complete before moving on, hence the need for this new function.
> >>>
> >>> The non-blocking CTB now must have a flow control mechanism to ensure
> >>> the buffer isn't overrun. A lazy spin wait is used as we believe the
> >>> flow control condition should be rare with a properly sized buffer.
> >>>
> >>> The function, intel_guc_send_nb, is exported in this patch but unused.
> >>> Several patches later in the series make use of this function.
> >>>
> >>> Signed-off-by: John Harrison 
> >>> Signed-off-by: Matthew Brost 
> >>> ---
> >>>  drivers/gpu/drm/i915/gt/uc/intel_guc.h| 12 +++-
> >>>  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 77 +--
> >>>  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |  3 +-
> >>>  3 files changed, 82 insertions(+), 10 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
> >>> b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> >>> index 4abc59f6f3cd..24b1df6ad4ae 100644
> >>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> >>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> >>> @@ -74,7 +74,15 @@ static inline struct intel_guc *log_to_guc(struct 
> >>> intel_guc_log *log)
> >>>  static
> >>>  inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 
> >>> len)
> >>>  {
> >>> - return intel_guc_ct_send(&guc->ct, action, len, NULL, 0);
> >>> + return intel_guc_ct_send(&guc->ct, action, len, NULL, 0, 0);
> >>> +}
> >>> +
> >>> +#define INTEL_GUC_SEND_NBBIT(31)
> >>
> >> hmm, this flag really belongs to intel_guc_ct_send() so it should be
> >> defined as CTB flag near that function declaration
> >>
> > 
> > I can move this up a few lines.
> > 
> >>> +static
> >>> +inline int intel_guc_send_nb(struct intel_guc *guc, const u32 *action, 
> >>> u32 len)
> >>> +{
> >>> + return intel_guc_ct_send(&guc->ct, action, len, NULL, 0,
> >>> +  INTEL_GUC_SEND_NB);
> >>>  }
> >>>  
> >>>  static inline int
> >>> @@ -82,7 +90,7 @@ intel_guc_send_and_receive(struct intel_guc *guc, const 
> >>> u32 *action, u32 len,
> >>>  u32 *response_buf, u32 response_buf_size)
> >>>  {
> >>>   return intel_guc_ct_send(&guc->ct, action, len,
> >>> -  response_buf, response_buf_size);
> >>> +  response_buf, response_buf_size, 0);
> >>>  }
> >>>  
> >>>  static inline void intel_guc_to_host_event_handler(struct intel_guc *guc)
> >>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
> >>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> >>> index a17215920e58..c9a65d05911f 100644
> >>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> >>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> >>> @@ -3,6 +3,11 @@
> >>>   * Copyright © 2016-2019 Intel Corporation
> >>>   */
> >>>  
> >>> +#include 
> >>> +#include 
> >>> +#include 
> >>> +#include 
> >>> +
> >>>  #include "i915_drv.h"
> >>>  #include "intel_guc_ct.h"
> >>>  #include "gt/intel_gt.h"
> >>> @@ -373,7 +378,7 @@ static void write_barrier(struct intel_guc_ct *ct)
> >>>  static int ct_write(struct intel_guc_ct *ct,
> >>>   const u32 *action,
> >>>   u32 len /* in dwords */,
> >>> - u32 fence)
> >>> + u32 fence, u32 flags)
> >>>  {
> >>>   struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
> >>>   struct guc_ct_buffer_desc *desc = ctb->desc;
> >>> @@ -421,9 +426,13 @@ static int ct_write(struct intel_guc_ct *ct,
> >>>FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) |
> >>>FIELD_PREP(GUC_CTB_MSG_0_FENCE, fence);
> >>>  
> >>> - hxg = FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
> >>> -   FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION |
> >>> -  GUC_HXG_REQUEST_MSG_0_DATA0, action[0]);
> >>> + hxg = (flags & INTEL_GUC_SEND_NB) ?
> >>> + (FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
> >>> +  FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
> >>> + GUC_HXG_EVENT_MSG_0_DATA0, action[0])) :
> >>> + (FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
> >>> +  FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION |
> >>> + GUC_HXG_REQUEST_MSG_0_DATA0, action[0]));
> >>
> >> or as we already switched to accept and return whole HXG messages in
> >> guc_send_mmio() maybe we should do the same for CTB variant too and
> >> instead of using extra flag just let caller to prepare proper HXG header
> >> with HXG_EVENT type and then in CTB code just look at this type to make
> >> decision which code path to use
> >>
> > 
> > Not sure I follow. Anyways could this be done in a fo

[PATCH] drm/panel: ws2401: Add driver for WideChips WS2401

2021-06-24 Thread Linus Walleij

This adds a driver for panels based on the WideChips WS2401 display
controller. This display controller is used in the Samsung LMS380KF01
display found in the Samsung GT-I8160 (Codina) mobile phone and
possibly others.

As is common with Samsung displays manufacturer commands are necessary
to configure the display to a working state.

The display optionally supports internal backlight control, but can
also use an external backlight.

This driver re-uses the DBI infrastructure to communicate with the
display.

Cc: phone-de...@vger.kernel.org
Cc: Douglas Anderson 
Cc: Noralf Trønnes 
Signed-off-by: Linus Walleij 
---
 MAINTAINERS   |   7 +
 drivers/gpu/drm/panel/Kconfig |   9 +
 drivers/gpu/drm/panel/Makefile|   1 +
 .../gpu/drm/panel/panel-widechips-ws2401.c| 404 ++
 4 files changed, 421 insertions(+)
 create mode 100644 drivers/gpu/drm/panel/panel-widechips-ws2401.c

diff --git a/MAINTAINERS b/MAINTAINERS
index bd7aff0c120f..8bfa89f61220 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5946,6 +5946,13 @@ S:   Maintained
 T: git git://anongit.freedesktop.org/drm/drm-misc
 F: drivers/gpu/drm/vboxvideo/
 
+DRM DRIVER FOR WIDECHIPS WS2401 PANELS
+M: Linus Walleij 
+S: Maintained
+T: git git://anongit.freedesktop.org/drm/drm-misc
+F: Documentation/devicetree/bindings/display/panel/samsung,lms380kf01.yaml
+F: drivers/gpu/drm/panel/panel-widechips-ws2401.c
+
 DRM DRIVER FOR VMWARE VIRTUAL GPU
 M: "VMware Graphics" 
 M: Roland Scheidegger 
diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig
index 4894913936e9..f4fe1dba9912 100644
--- a/drivers/gpu/drm/panel/Kconfig
+++ b/drivers/gpu/drm/panel/Kconfig
@@ -552,6 +552,15 @@ config DRM_PANEL_VISIONOX_RM69299
  Say Y here if you want to enable support for Visionox
  RM69299  DSI Video Mode panel.
 
+config DRM_PANEL_WIDECHIPS_WS2401
+   tristate "Widechips WS2401 DPI panel driver"
+   depends on OF && SPI && GPIOLIB
+   depends on BACKLIGHT_CLASS_DEVICE
+   select DRM_MIPI_DBI
+   help
+ Say Y here if you want to enable support for the Widechips
+ WS2401 DPI 480x800 display controller.
+
 config DRM_PANEL_XINPENG_XPP055C272
tristate "Xinpeng XPP055C272 panel driver"
depends on OF
diff --git a/drivers/gpu/drm/panel/Makefile b/drivers/gpu/drm/panel/Makefile
index cae4d976c069..d94c27df17aa 100644
--- a/drivers/gpu/drm/panel/Makefile
+++ b/drivers/gpu/drm/panel/Makefile
@@ -58,4 +58,5 @@ obj-$(CONFIG_DRM_PANEL_TPO_TD043MTEA1) += 
panel-tpo-td043mtea1.o
 obj-$(CONFIG_DRM_PANEL_TPO_TPG110) += panel-tpo-tpg110.o
 obj-$(CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA) += panel-truly-nt35597.o
 obj-$(CONFIG_DRM_PANEL_VISIONOX_RM69299) += panel-visionox-rm69299.o
+obj-$(CONFIG_DRM_PANEL_WIDECHIPS_WS2401) += panel-widechips-ws2401.o
 obj-$(CONFIG_DRM_PANEL_XINPENG_XPP055C272) += panel-xinpeng-xpp055c272.o
diff --git a/drivers/gpu/drm/panel/panel-widechips-ws2401.c 
b/drivers/gpu/drm/panel/panel-widechips-ws2401.c
new file mode 100644
index ..d15870301174
--- /dev/null
+++ b/drivers/gpu/drm/panel/panel-widechips-ws2401.c
@@ -0,0 +1,404 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Panel driver for the WideChips WS2401 480x800 DPI RGB panel, used in
+ * the Samsung Mobile Display (SMD) LMS380KF01.
+ * Found in the Samsung Galaxy Ace 2 GT-I8160 mobile phone.
+ * Linus Walleij 
+ * Inspired by code and know-how in the vendor driver by Gareth Phillips.
+ */
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#define WS2401_RESCTL  0xb8 /* Resolution select control */
+#define WS2401_PSMPS   0xbd /* SMPS positive control */
+#define WS2401_NSMPS   0xbe /* SMPS negative control */
+#define WS2401_SMPS0xbf
+#define WS2401_BCMODE  0xc1 /* Backlight control mode */
+#define WS2401_WRBLCTL 0xc3 /* Backlight control */
+#define WS2401_WRDISBV 0xc4 /* Write manual brightness */
+#define WS2401_WRCTRLD 0xc6 /* Write BL control */
+#define WS2401_WRMIE   0xc7 /* Write MIE mode */
+#define WS2401_READ_ID10xda /* Read panel ID 1 */
+#define WS2401_READ_ID20xdb /* Read panel ID 2 */
+#define WS2401_READ_ID30xdc /* Read panel ID 3 */
+#define WS2401_PASSWD1 0xf0 /* Password command for level 2 */
+#define WS2401_DISCTL  0xf2 /* Display control */
+#define WS2401_PWRCTL  0xf3 /* Power control */
+#define WS2401_VCOMCTL 0xf4 /* VCOM control */
+#define WS2401_SRCCTL  0xf5 /* Source control */
+#define WS2401_PANELCTL0xf6 /* Panel control */
+
+static const u8 ws2

[PATCH] drm/panel: Add DT bindings for Samsung LMS380KF01

2021-06-24 Thread Linus Walleij

This adds device tree bindings for the Samsung Mobile Displays
LMS380KF01 RGB DPI display panel.

Cc: devicet...@vger.kernel.org
Cc: phone-de...@vger.kernel.org
Cc: Douglas Anderson 
Cc: Noralf Trønnes 
Signed-off-by: Linus Walleij 
---
 .../display/panel/samsung,lms380kf01.yaml | 96 +++
 1 file changed, 96 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/display/panel/samsung,lms380kf01.yaml

diff --git 
a/Documentation/devicetree/bindings/display/panel/samsung,lms380kf01.yaml 
b/Documentation/devicetree/bindings/display/panel/samsung,lms380kf01.yaml
new file mode 100644
index ..138be12fc509
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/samsung,lms380kf01.yaml
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/samsung,lms380kf01.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung LMS380KF01 display panel
+
+description: The LMS380KF01 is a 480x800 DPI display panel from Samsung Mobile
+  Displays (SMD) utilizing the WideChips WS2401 display controller. It can be
+  used with internal or external backlight control.
+
+maintainers:
+  - Linus Walleij 
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+const: samsung,lms380kf01
+
+  reg: true
+
+  interrupts:
+description: provides an optional ESD (electrostatic discharge)
+  interrupt that signals abnormalities in the display hardware.
+  This can also be raised for other reasons like erroneous
+  configuration.
+maxItems: 1
+
+  reset-gpios: true
+
+  vci-supply:
+description: regulator that supplies the VCI analog voltage
+  usually around 3.0 V
+
+  vccio-supply:
+description: regulator that supplies the VCCIO voltage usually
+  around 1.8 V
+
+  backlight: true
+
+  spi-cpha:
+$ref: /schemas/types.yaml#/definitions/flag
+description: inherited as a SPI client node. Must be set.
+
+  spi-cpol:
+$ref: /schemas/types.yaml#/definitions/flag
+description: inherited as a SPI client node. Must be set.
+
+  spi-max-frequency:
+$ref: /schemas/types.yaml#/definitions/uint32
+description: inherited as a SPI client node.
+maximum: 120
+
+  port: true
+
+required:
+  - compatible
+  - reg
+  - spi-cpha
+  - spi-cpol
+
+additionalProperties: false
+
+examples:
+  - |
+#include 
+#include 
+
+spi {
+  #address-cells = <1>;
+  #size-cells = <0>;
+
+  panel@0 {
+compatible = "samsung,lms380kf01";
+spi-max-frequency = <120>;
+spi-cpha;
+spi-cpol;
+reg = <0>;
+vci-supply = <&lcd_3v0_reg>;
+vccio-supply = <&lcd_1v8_reg>;
+reset-gpios = <&gpio4 11 GPIO_ACTIVE_LOW>;
+interrupt-parent = <&gpio2>;
+interrupts = <29 IRQ_TYPE_EDGE_RISING>;
+
+port {
+  panel_in: endpoint {
+remote-endpoint = <&display_out>;
+  };
+};
+  };
+};
+
+...
-- 
2.31.1

[Bug 213561] [bisected][regression] GFX10 AMDGPUs can no longer enter idle state after commit. Commit has been pushed to stable branches too.

2021-06-24 Thread bugzilla-daemon

https://bugzilla.kernel.org/show_bug.cgi?id=213561

Alan Swanson (rei...@improbability.net) changed:

   What|Removed |Added

 CC||rei...@improbability.net

--- Comment #9 from Alan Swanson (rei...@improbability.net) ---
These patches have just been reverted for 5.13-rc8 and should hopefully be
backported to be stable.

https://lists.freedesktop.org/archives/amd-gfx/2021-June/065575.html
https://lists.freedesktop.org/archives/dri-devel/2021-June/312755.html

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.

Re: [PATCH v4 2/3] dt-bindings: msm: dsi: document phy-type property for 7nm dsi phy

2021-06-24 Thread Rob Herring

On Thu, Jun 17, 2021 at 10:43:34AM -0400, Jonathan Marek wrote:
> Document a new phy-type property which will be used to determine whether
> the phy should operate in D-PHY or C-PHY mode.
> 
> Signed-off-by: Jonathan Marek 
> Reviewed-by: Laurent Pinchart 
> ---
>  .../devicetree/bindings/display/msm/dsi-phy-7nm.yaml | 5 +
>  include/dt-bindings/phy/phy.h| 2 ++
>  2 files changed, 7 insertions(+)
> 
> diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml 
> b/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml
> index c0077ca7e9e7..70809d1cac54 100644
> --- a/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml
> +++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml
> @@ -34,6 +34,11 @@ properties:
>  description: |
>Connected to VDD_A_DSI_PLL_0P9 pin (or VDDA_DSI{0,1}_PLL_0P9 for 
> sm8150)
>  
> +  phy-type:
> +description: D-PHY (default) or C-PHY mode
> +enum: [ 10, 11 ]
> +default: 10
> +
>  required:
>- compatible
>- reg
> diff --git a/include/dt-bindings/phy/phy.h b/include/dt-bindings/phy/phy.h
> index 887a31b250a8..f48c9acf251e 100644
> --- a/include/dt-bindings/phy/phy.h
> +++ b/include/dt-bindings/phy/phy.h
> @@ -20,5 +20,7 @@
>  #define PHY_TYPE_XPCS7
>  #define PHY_TYPE_SGMII   8
>  #define PHY_TYPE_QSGMII  9
> +#define PHY_TYPE_DPHY10
> +#define PHY_TYPE_CPHY11

I thought I recalled a suggestion to add 'MIPI_' in this. Or was there 
another similar patch? If not, I'm fine either way:

Acked-by: Rob Herring

Re: [PATCH] drm/sched: Split drm_sched_job_init

2021-06-24 Thread Daniel Vetter

On Thu, Jun 24, 2021 at 11:00 PM Emma Anholt  wrote:
>
> On Thu, Jun 24, 2021 at 1:45 PM Daniel Vetter  wrote:
> >
> > This is a very confusingly named function, because not just does it
> > init an object, it arms it and provides a point of no return for
> > pushing a job into the scheduler. It would be nice if that's a bit
> > clearer in the interface.
> >
> > But the real reason is that I want to push the dependency tracking
> > helpers into the scheduler code, and that means drm_sched_job_init
> > must be called a lot earlier, without arming the job.
> >
> > v2:
> > - don't change .gitignore (Steven)
> > - don't forget v3d (Emma)
> >
> > Acked-by: Steven Price 
> > Signed-off-by: Daniel Vetter 
> > Cc: Lucas Stach 
> > Cc: Russell King 
> > Cc: Christian Gmeiner 
> > Cc: Qiang Yu 
> > Cc: Rob Herring 
> > Cc: Tomeu Vizoso 
> > Cc: Steven Price 
> > Cc: Alyssa Rosenzweig 
> > Cc: David Airlie 
> > Cc: Daniel Vetter 
> > Cc: Sumit Semwal 
> > Cc: "Christian König" 
> > Cc: Masahiro Yamada 
> > Cc: Kees Cook 
> > Cc: Adam Borowski 
> > Cc: Nick Terrell 
> > Cc: Mauro Carvalho Chehab 
> > Cc: Paul Menzel 
> > Cc: Sami Tolvanen 
> > Cc: Viresh Kumar 
> > Cc: Alex Deucher 
> > Cc: Dave Airlie 
> > Cc: Nirmoy Das 
> > Cc: Deepak R Varma 
> > Cc: Lee Jones 
> > Cc: Kevin Wang 
> > Cc: Chen Li 
> > Cc: Luben Tuikov 
> > Cc: "Marek Olšák" 
> > Cc: Dennis Li 
> > Cc: Maarten Lankhorst 
> > Cc: Andrey Grodzovsky 
> > Cc: Sonny Jiang 
> > Cc: Boris Brezillon 
> > Cc: Tian Tao 
> > Cc: Jack Zhang 
> > Cc: etna...@lists.freedesktop.org
> > Cc: l...@lists.freedesktop.org
> > Cc: linux-me...@vger.kernel.org
> > Cc: linaro-mm-...@lists.linaro.org
> > Cc: Emma Anholt 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  2 ++
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  |  2 ++
> >  drivers/gpu/drm/etnaviv/etnaviv_sched.c  |  2 ++
> >  drivers/gpu/drm/lima/lima_sched.c|  2 ++
> >  drivers/gpu/drm/panfrost/panfrost_job.c  |  2 ++
> >  drivers/gpu/drm/scheduler/sched_entity.c |  6 +++---
> >  drivers/gpu/drm/scheduler/sched_fence.c  | 15 ++-
> >  drivers/gpu/drm/scheduler/sched_main.c   | 23 ++-
> >  drivers/gpu/drm/v3d/v3d_gem.c|  2 ++
> >  include/drm/gpu_scheduler.h  |  6 +-
> >  10 files changed, 52 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > index c5386d13eb4a..a4ec092af9a7 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > @@ -1226,6 +1226,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser 
> > *p,
> > if (r)
> > goto error_unlock;
> >
> > +   drm_sched_job_arm(&job->base);
> > +
> > /* No memory allocation is allowed while holding the notifier lock.
> >  * The lock is held until amdgpu_cs_submit is finished and fence is
> >  * added to BOs.
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> > index d33e6d97cc89..5ddb955d2315 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> > @@ -170,6 +170,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct 
> > drm_sched_entity *entity,
> > if (r)
> > return r;
> >
> > +   drm_sched_job_arm(&job->base);
> > +
> > *f = dma_fence_get(&job->base.s_fence->finished);
> > amdgpu_job_free_resources(job);
> > drm_sched_entity_push_job(&job->base, entity);
> > diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c 
> > b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> > index 19826e504efc..af1671f01c7f 100644
> > --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> > +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> > @@ -163,6 +163,8 @@ int etnaviv_sched_push_job(struct drm_sched_entity 
> > *sched_entity,
> > if (ret)
> > goto out_unlock;
> >
> > +   drm_sched_job_arm(&submit->sched_job);
> > +
> > submit->out_fence = 
> > dma_fence_get(&submit->sched_job.s_fence->finished);
> > submit->out_fence_id = idr_alloc_cyclic(&submit->gpu->fence_idr,
> > submit->out_fence, 0,
> > diff --git a/drivers/gpu/drm/lima/lima_sched.c 
> > b/drivers/gpu/drm/lima/lima_sched.c
> > index ecf3267334ff..bd1af1fd8c0f 100644
> > --- a/drivers/gpu/drm/lima/lima_sched.c
> > +++ b/drivers/gpu/drm/lima/lima_sched.c
> > @@ -129,6 +129,8 @@ int lima_sched_task_init(struct lima_sched_task *task,
> > return err;
> > }
> >
> > +   drm_sched_job_arm(&task->base);
> > +
> > task->num_bos = num_bos;
> > task->vm = lima_vm_get(vm);
> >
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c 
> > b/drivers/gpu/drm/panfrost/panfrost_job.c
> > index beb62c8fc851..1e950534b9b0 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> > +++ b/drivers/gpu/drm/p

[Bug 212107] Temperature increase by 15°C on radeon gpu

2021-06-24 Thread bugzilla-daemon

https://bugzilla.kernel.org/show_bug.cgi?id=212107

miloog (mileikas...@mailbox.org) changed:

   What|Removed |Added

 CC||mileikas...@mailbox.org

--- Comment #11 from miloog (mileikas...@mailbox.org) ---
I can confirm.

But in a different scenario. I'm using debian bullseye with lts kernel and
latest amdgpu firmware. I don't change any fan control mechanism.

5.10.44 and 5.10.45 works fine but 5.10.46 if i'm only start sway (wayland
window manager) my gpu usage is at 100% without doing anything.

It's a vega 56.

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.

Re: [PATCH] drm/sched: Split drm_sched_job_init

2021-06-24 Thread Emma Anholt

On Thu, Jun 24, 2021 at 1:45 PM Daniel Vetter  wrote:
>
> This is a very confusingly named function, because not just does it
> init an object, it arms it and provides a point of no return for
> pushing a job into the scheduler. It would be nice if that's a bit
> clearer in the interface.
>
> But the real reason is that I want to push the dependency tracking
> helpers into the scheduler code, and that means drm_sched_job_init
> must be called a lot earlier, without arming the job.
>
> v2:
> - don't change .gitignore (Steven)
> - don't forget v3d (Emma)
>
> Acked-by: Steven Price 
> Signed-off-by: Daniel Vetter 
> Cc: Lucas Stach 
> Cc: Russell King 
> Cc: Christian Gmeiner 
> Cc: Qiang Yu 
> Cc: Rob Herring 
> Cc: Tomeu Vizoso 
> Cc: Steven Price 
> Cc: Alyssa Rosenzweig 
> Cc: David Airlie 
> Cc: Daniel Vetter 
> Cc: Sumit Semwal 
> Cc: "Christian König" 
> Cc: Masahiro Yamada 
> Cc: Kees Cook 
> Cc: Adam Borowski 
> Cc: Nick Terrell 
> Cc: Mauro Carvalho Chehab 
> Cc: Paul Menzel 
> Cc: Sami Tolvanen 
> Cc: Viresh Kumar 
> Cc: Alex Deucher 
> Cc: Dave Airlie 
> Cc: Nirmoy Das 
> Cc: Deepak R Varma 
> Cc: Lee Jones 
> Cc: Kevin Wang 
> Cc: Chen Li 
> Cc: Luben Tuikov 
> Cc: "Marek Olšák" 
> Cc: Dennis Li 
> Cc: Maarten Lankhorst 
> Cc: Andrey Grodzovsky 
> Cc: Sonny Jiang 
> Cc: Boris Brezillon 
> Cc: Tian Tao 
> Cc: Jack Zhang 
> Cc: etna...@lists.freedesktop.org
> Cc: l...@lists.freedesktop.org
> Cc: linux-me...@vger.kernel.org
> Cc: linaro-mm-...@lists.linaro.org
> Cc: Emma Anholt 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  2 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  |  2 ++
>  drivers/gpu/drm/etnaviv/etnaviv_sched.c  |  2 ++
>  drivers/gpu/drm/lima/lima_sched.c|  2 ++
>  drivers/gpu/drm/panfrost/panfrost_job.c  |  2 ++
>  drivers/gpu/drm/scheduler/sched_entity.c |  6 +++---
>  drivers/gpu/drm/scheduler/sched_fence.c  | 15 ++-
>  drivers/gpu/drm/scheduler/sched_main.c   | 23 ++-
>  drivers/gpu/drm/v3d/v3d_gem.c|  2 ++
>  include/drm/gpu_scheduler.h  |  6 +-
>  10 files changed, 52 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index c5386d13eb4a..a4ec092af9a7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1226,6 +1226,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
> if (r)
> goto error_unlock;
>
> +   drm_sched_job_arm(&job->base);
> +
> /* No memory allocation is allowed while holding the notifier lock.
>  * The lock is held until amdgpu_cs_submit is finished and fence is
>  * added to BOs.
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index d33e6d97cc89..5ddb955d2315 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -170,6 +170,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct 
> drm_sched_entity *entity,
> if (r)
> return r;
>
> +   drm_sched_job_arm(&job->base);
> +
> *f = dma_fence_get(&job->base.s_fence->finished);
> amdgpu_job_free_resources(job);
> drm_sched_entity_push_job(&job->base, entity);
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c 
> b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> index 19826e504efc..af1671f01c7f 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> @@ -163,6 +163,8 @@ int etnaviv_sched_push_job(struct drm_sched_entity 
> *sched_entity,
> if (ret)
> goto out_unlock;
>
> +   drm_sched_job_arm(&submit->sched_job);
> +
> submit->out_fence = 
> dma_fence_get(&submit->sched_job.s_fence->finished);
> submit->out_fence_id = idr_alloc_cyclic(&submit->gpu->fence_idr,
> submit->out_fence, 0,
> diff --git a/drivers/gpu/drm/lima/lima_sched.c 
> b/drivers/gpu/drm/lima/lima_sched.c
> index ecf3267334ff..bd1af1fd8c0f 100644
> --- a/drivers/gpu/drm/lima/lima_sched.c
> +++ b/drivers/gpu/drm/lima/lima_sched.c
> @@ -129,6 +129,8 @@ int lima_sched_task_init(struct lima_sched_task *task,
> return err;
> }
>
> +   drm_sched_job_arm(&task->base);
> +
> task->num_bos = num_bos;
> task->vm = lima_vm_get(vm);
>
> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c 
> b/drivers/gpu/drm/panfrost/panfrost_job.c
> index beb62c8fc851..1e950534b9b0 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> @@ -244,6 +244,8 @@ int panfrost_job_push(struct panfrost_job *job)
> goto unlock;
> }
>
> +   drm_sched_job_arm(&job->base);
> +
> job->render_done_fence = dma_fence_get(&job->base.s_fence->finished);
>
> ret = panfrost_acquire_object_fences(job->bos, job->bo_

[Bug 213569] Amdgpu temperature reaching dangerous levels

2021-06-24 Thread bugzilla-daemon

https://bugzilla.kernel.org/show_bug.cgi?id=213569

miloog (mileikas...@mailbox.org) changed:

   What|Removed |Added

 CC||mileikas...@mailbox.org

--- Comment #1 from miloog (mileikas...@mailbox.org) ---
I can confirm.

But in a different scenario. I'm using debian bullseye with lts kernel and
latest amdgpu firmware. I don't change any fan control mechanism.

5.10.44 and 5.10.45 works fine but 5.10.46 if i'm only start sway (wayland
window manager) my gpu usage is at 100% without doing anything.

It's a vega 56.

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.

Re: [PATCH v2 2/2] drm/panel: Add support for E Ink VB3300-KCA

2021-06-24 Thread Rob Herring

On Tue, Jun 15, 2021 at 08:33:12PM +1000, Alistair Francis wrote:
> Add support for the 10.3" E Ink panel described at:
> https://www.eink.com/product.html?type=productdetail&id=7
> 
> Signed-off-by: Alistair Francis 
> ---
> v2:
>  - Fix build warning
>  - Document new string
> 
>  .../bindings/display/panel/panel-simple.yaml  |  2 ++
>  drivers/gpu/drm/panel/panel-simple.c  | 29 +++
>  2 files changed, 31 insertions(+)
> 
> diff --git 
> a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml 
> b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml
> index b3797ba2698b..799e20222551 100644
> --- a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml
> +++ b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml
> @@ -128,6 +128,8 @@ properties:
>  # Emerging Display Technology Corp. WVGA TFT Display with capacitive 
> touch
>- edt,etm0700g0dh6
>- edt,etm0700g0edh6
> +# E Ink VB3300-KCA
> +  - eink,vb3300-kca

Combining this with patch 1 would be preferrable. Either way,

Acked-by: Rob Herring

Re: [PATCH] drm/v3d: Move drm_sched_job_init to v3d_job_init

2021-06-24 Thread Emma Anholt

On Thu, Jun 24, 2021 at 1:45 PM Daniel Vetter  wrote:
>
> Prep work for using the scheduler dependency handling. We need to call
> drm_sched_job_init earlier so we can use the new drm_sched_job_await*
> functions for dependency handling here.
>
> v2: Slightly better commit message and rebase to include the
> drm_sched_job_arm() call (Emma).
>
> Signed-off-by: Daniel Vetter 
> Cc: Emma Anholt 

OK, makes sense now.  r-b.

[PATCH] drm/sched: Split drm_sched_job_init

2021-06-24 Thread Daniel Vetter

This is a very confusingly named function, because not just does it
init an object, it arms it and provides a point of no return for
pushing a job into the scheduler. It would be nice if that's a bit
clearer in the interface.

But the real reason is that I want to push the dependency tracking
helpers into the scheduler code, and that means drm_sched_job_init
must be called a lot earlier, without arming the job.

v2:
- don't change .gitignore (Steven)
- don't forget v3d (Emma)

Acked-by: Steven Price 
Signed-off-by: Daniel Vetter 
Cc: Lucas Stach 
Cc: Russell King 
Cc: Christian Gmeiner 
Cc: Qiang Yu 
Cc: Rob Herring 
Cc: Tomeu Vizoso 
Cc: Steven Price 
Cc: Alyssa Rosenzweig 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: Sumit Semwal 
Cc: "Christian König" 
Cc: Masahiro Yamada 
Cc: Kees Cook 
Cc: Adam Borowski 
Cc: Nick Terrell 
Cc: Mauro Carvalho Chehab 
Cc: Paul Menzel 
Cc: Sami Tolvanen 
Cc: Viresh Kumar 
Cc: Alex Deucher 
Cc: Dave Airlie 
Cc: Nirmoy Das 
Cc: Deepak R Varma 
Cc: Lee Jones 
Cc: Kevin Wang 
Cc: Chen Li 
Cc: Luben Tuikov 
Cc: "Marek Olšák" 
Cc: Dennis Li 
Cc: Maarten Lankhorst 
Cc: Andrey Grodzovsky 
Cc: Sonny Jiang 
Cc: Boris Brezillon 
Cc: Tian Tao 
Cc: Jack Zhang 
Cc: etna...@lists.freedesktop.org
Cc: l...@lists.freedesktop.org
Cc: linux-me...@vger.kernel.org
Cc: linaro-mm-...@lists.linaro.org
Cc: Emma Anholt 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  |  2 ++
 drivers/gpu/drm/etnaviv/etnaviv_sched.c  |  2 ++
 drivers/gpu/drm/lima/lima_sched.c|  2 ++
 drivers/gpu/drm/panfrost/panfrost_job.c  |  2 ++
 drivers/gpu/drm/scheduler/sched_entity.c |  6 +++---
 drivers/gpu/drm/scheduler/sched_fence.c  | 15 ++-
 drivers/gpu/drm/scheduler/sched_main.c   | 23 ++-
 drivers/gpu/drm/v3d/v3d_gem.c|  2 ++
 include/drm/gpu_scheduler.h  |  6 +-
 10 files changed, 52 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index c5386d13eb4a..a4ec092af9a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1226,6 +1226,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
if (r)
goto error_unlock;
 
+   drm_sched_job_arm(&job->base);
+
/* No memory allocation is allowed while holding the notifier lock.
 * The lock is held until amdgpu_cs_submit is finished and fence is
 * added to BOs.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index d33e6d97cc89..5ddb955d2315 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -170,6 +170,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct 
drm_sched_entity *entity,
if (r)
return r;
 
+   drm_sched_job_arm(&job->base);
+
*f = dma_fence_get(&job->base.s_fence->finished);
amdgpu_job_free_resources(job);
drm_sched_entity_push_job(&job->base, entity);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c 
b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index 19826e504efc..af1671f01c7f 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -163,6 +163,8 @@ int etnaviv_sched_push_job(struct drm_sched_entity 
*sched_entity,
if (ret)
goto out_unlock;
 
+   drm_sched_job_arm(&submit->sched_job);
+
submit->out_fence = dma_fence_get(&submit->sched_job.s_fence->finished);
submit->out_fence_id = idr_alloc_cyclic(&submit->gpu->fence_idr,
submit->out_fence, 0,
diff --git a/drivers/gpu/drm/lima/lima_sched.c 
b/drivers/gpu/drm/lima/lima_sched.c
index ecf3267334ff..bd1af1fd8c0f 100644
--- a/drivers/gpu/drm/lima/lima_sched.c
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -129,6 +129,8 @@ int lima_sched_task_init(struct lima_sched_task *task,
return err;
}
 
+   drm_sched_job_arm(&task->base);
+
task->num_bos = num_bos;
task->vm = lima_vm_get(vm);
 
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c 
b/drivers/gpu/drm/panfrost/panfrost_job.c
index beb62c8fc851..1e950534b9b0 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -244,6 +244,8 @@ int panfrost_job_push(struct panfrost_job *job)
goto unlock;
}
 
+   drm_sched_job_arm(&job->base);
+
job->render_done_fence = dma_fence_get(&job->base.s_fence->finished);
 
ret = panfrost_acquire_object_fences(job->bos, job->bo_count,
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
b/drivers/gpu/drm/scheduler/sched_entity.c
index 79554aa4dbb1..f7347c284886 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -485,9 +485,9 @@ void drm_sched_entity_select_rq(struct drm_sched_entity 
*entity

[PATCH] drm/v3d: Move drm_sched_job_init to v3d_job_init

2021-06-24 Thread Daniel Vetter

Prep work for using the scheduler dependency handling. We need to call
drm_sched_job_init earlier so we can use the new drm_sched_job_await*
functions for dependency handling here.

v2: Slightly better commit message and rebase to include the
drm_sched_job_arm() call (Emma).

Signed-off-by: Daniel Vetter 
Cc: Emma Anholt 
---
 drivers/gpu/drm/v3d/v3d_gem.c | 64 +++
 1 file changed, 20 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 69ac20e11b09..d1028ccf6dd5 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -433,9 +433,10 @@ v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
 static int
 v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
 struct v3d_job *job, void (*free)(struct kref *ref),
-u32 in_sync)
+u32 in_sync, enum v3d_queue queue)
 {
struct dma_fence *in_fence = NULL;
+   struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
int ret;
 
job->v3d = v3d;
@@ -446,6 +447,10 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file 
*file_priv,
return ret;
 
xa_init_flags(&job->deps, XA_FLAGS_ALLOC);
+   ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
+v3d_priv);
+   if (ret)
+   goto fail;
 
ret = drm_syncobj_find_fence(file_priv, in_sync, 0, 0, &in_fence);
if (ret == -EINVAL)
@@ -464,17 +469,9 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file 
*file_priv,
return ret;
 }
 
-static int
-v3d_push_job(struct v3d_file_priv *v3d_priv,
-struct v3d_job *job, enum v3d_queue queue)
+static void
+v3d_push_job(struct v3d_job *job)
 {
-   int ret;
-
-   ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
-v3d_priv);
-   if (ret)
-   return ret;
-
drm_sched_job_arm(&job->base);
 
job->done_fence = dma_fence_get(&job->base.s_fence->finished);
@@ -483,8 +480,6 @@ v3d_push_job(struct v3d_file_priv *v3d_priv,
kref_get(&job->refcount);
 
drm_sched_entity_push_job(&job->base);
-
-   return 0;
 }
 
 static void
@@ -530,7 +525,6 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
 {
struct v3d_dev *v3d = to_v3d_dev(dev);
-   struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
struct drm_v3d_submit_cl *args = data;
struct v3d_bin_job *bin = NULL;
struct v3d_render_job *render;
@@ -556,7 +550,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
INIT_LIST_HEAD(&render->unref_list);
 
ret = v3d_job_init(v3d, file_priv, &render->base,
-  v3d_render_job_free, args->in_sync_rcl);
+  v3d_render_job_free, args->in_sync_rcl, V3D_RENDER);
if (ret) {
kfree(render);
return ret;
@@ -570,7 +564,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
}
 
ret = v3d_job_init(v3d, file_priv, &bin->base,
-  v3d_job_free, args->in_sync_bcl);
+  v3d_job_free, args->in_sync_bcl, V3D_BIN);
if (ret) {
v3d_job_put(&render->base);
kfree(bin);
@@ -592,7 +586,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
goto fail;
}
 
-   ret = v3d_job_init(v3d, file_priv, clean_job, v3d_job_free, 0);
+   ret = v3d_job_init(v3d, file_priv, clean_job, v3d_job_free, 0, 
V3D_CACHE_CLEAN);
if (ret) {
kfree(clean_job);
clean_job = NULL;
@@ -615,9 +609,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 
mutex_lock(&v3d->sched_lock);
if (bin) {
-   ret = v3d_push_job(v3d_priv, &bin->base, V3D_BIN);
-   if (ret)
-   goto fail_unreserve;
+   v3d_push_job(&bin->base);
 
ret = drm_gem_fence_array_add(&render->base.deps,
  
dma_fence_get(bin->base.done_fence));
@@ -625,9 +617,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
goto fail_unreserve;
}
 
-   ret = v3d_push_job(v3d_priv, &render->base, V3D_RENDER);
-   if (ret)
-   goto fail_unreserve;
+   v3d_push_job(&render->base);
 
if (clean_job) {
struct dma_fence *render_fence =
@@ -635,9 +625,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
ret = drm_gem_fence_array_add(&clean_job->deps, render_fence);
if (ret)
goto fail_unreserve;
-   ret = v3d_push_job(v3d_priv, clean_j

Re: [git pull] drm fixes for 5.13-rc8/final

2021-06-24 Thread pr-tracker-bot

The pull request you sent on Fri, 25 Jun 2021 06:17:22 +1000:

> git://anongit.freedesktop.org/drm/drm tags/drm-fixes-2021-06-25

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/44db63d1ad8d71c6932cbe007eb41f31c434d140

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html

[git pull] drm fixes for 5.13-rc8/final

2021-06-24 Thread Dave Airlie

Hi Linus,

This is a bit bigger than I'd like at this stage, and I guess last
week was extra quiet, but it's mostly one fix across 3 drivers to wait
for buffer move pinning to complete. There was one locking change that
got reverted so it's just noise. Otherwise the amdgpu/nouveau changes
are for known regressions, and otherwise it's just misc changes in
kmb/atmel/vc4 drivers.

I'm off for a few days (have email for some of it), but I'll be back
mid next week for hopefully the next PR.

Dave.

drm-fixes-2021-06-25:
drm fixes for 5.13-rc8/final

radeon/nouveau/amdgpu/ttm:
- same fix in 3 drivers to wait for BO to be pinned after
  moving it.

core:
- auth locking change + brown paper bag revert

amdgpu:
- Revert GFX9, 10 doorbell fixes, we just
  end up trading one bug for another
- Potential memory corruption fix in framebuffer handling

nouveau:
- fix regression checking dma addresses

kmb:
- error return fix

atmel-hlcdc:
- fix kernel warnings at boot
- enable async flips

vc4:
- fix CPU hang due to power management
The following changes since commit 13311e74253fe64329390df80bed3f07314ddd61:

  Linux 5.13-rc7 (2021-06-20 15:03:15 -0700)

are available in the Git repository at:

  git://anongit.freedesktop.org/drm/drm tags/drm-fixes-2021-06-25

for you to fetch changes up to 5e0e7a407675d9f50f1d840214beaec42293b79b:

  Merge tag 'drm-misc-fixes-2021-06-24' of
git://anongit.freedesktop.org/drm/drm-misc into drm-fixes (2021-06-25
06:05:13 +1000)


drm fixes for 5.13-rc8/final

radeon/nouveau/amdgpu/ttm:
- same fix in 3 drivers to wait for BO to be pinned after
  moving it.

core:
- auth locking change + brown paper bag revert

amdgpu:
- Revert GFX9, 10 doorbell fixes, we just
  end up trading one bug for another
- Potential memory corruption fix in framebuffer handling

nouveau:
- fix regression checking dma addresses

kmb:
- error return fix

atmel-hlcdc:
- fix kernel warnings at boot
- enable async flips

vc4:
- fix CPU hang due to power management


Christian König (4):
  drm/nouveau: wait for moving fence after pinning v2
  drm/radeon: wait for moving fence after pinning
  drm/amdgpu: wait for moving fence after pinning
  drm/nouveau: fix dma_address check for CPU/GPU sync

Dan Sneddon (2):
  drm: atmel_hlcdc: Enable the crtc vblank prior to crtc usage.
  drm/atmel-hlcdc: Allow async page flips

Daniel Vetter (1):
  Revert "drm: add a locked version of drm_is_current_master"

Dave Airlie (2):
  Merge tag 'amd-drm-fixes-5.13-2021-06-21' of
https://gitlab.freedesktop.org/agd5f/linux into drm-fixes
  Merge tag 'drm-misc-fixes-2021-06-24' of
git://anongit.freedesktop.org/drm/drm-misc into drm-fixes

Desmond Cheong Zhi Xi (1):
  drm: add a locked version of drm_is_current_master

Krzysztof Kozlowski (1):
  drm/panel: ld9040: reference spi_device_id table

Maxime Ripard (2):
  drm/vc4: hdmi: Move the HSM clock enable to runtime_pm
  drm/vc4: hdmi: Make sure the controller is powered in detect

Michel Dänzer (1):
  drm/amdgpu: Call drm_framebuffer_init last for framebuffer init

Yifan Zhang (2):
  Revert "drm/amdgpu/gfx10: enlarge CP_MEC_DOORBELL_RANGE_UPPER to
cover full doorbell."
  Revert "drm/amdgpu/gfx9: fix the doorbell missing when in CGPG issue."

Zhen Lei (1):
  drm/kmb: Fix error return code in kmb_hw_init()

 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c| 12 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c| 14 +++-
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c |  6 +---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  |  6 +---
 drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c | 17 ++
 drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c   |  1 +
 drivers/gpu/drm/kmb/kmb_drv.c  |  1 +
 drivers/gpu/drm/nouveau/nouveau_bo.c   |  4 +--
 drivers/gpu/drm/nouveau/nouveau_prime.c| 17 +-
 drivers/gpu/drm/panel/panel-samsung-ld9040.c   |  1 +
 drivers/gpu/drm/radeon/radeon_prime.c  | 16 --
 drivers/gpu/drm/vc4/vc4_hdmi.c | 44 --
 12 files changed, 100 insertions(+), 39 deletions(-)

Re: [PATCH v3 2/2] backlight: lm3630a: convert to atomic PWM API and check for errors

2021-06-24 Thread Uwe Kleine-König

Hi Lee,

On Tue, Jun 22, 2021 at 02:12:57PM +0100, Lee Jones wrote:
> On Mon, 21 Jun 2021, Uwe Kleine-König wrote:
> 
> > The practical upside here is that this only needs a single API call to
> > program the hardware which (depending on the underlaying hardware) can
> > be more effective and prevents glitches.
> > 
> > Up to now the return value of the pwm functions was ignored. Fix this
> > and propagate the error to the caller.
> > 
> > Signed-off-by: Uwe Kleine-König 
> > ---
> >  drivers/video/backlight/lm3630a_bl.c | 42 +---
> >  1 file changed, 19 insertions(+), 23 deletions(-)
> 
> Fixed the subject line and applied, thanks.

It's not obvious to me what needed fixing here, and I don't find where
you the patches, neither in next nor in
https://git.kernel.org/pub/scm/linux/kernel/git/lee/backlight.git; so I
cannot check what you actually changed.

I assume you did s/lm3630a/lm3630a_bl/ ? I didn't because it felt
tautological.

Best regards
Uwe

-- 
Pengutronix e.K.   | Uwe Kleine-König|
Industrial Linux Solutions | https://www.pengutronix.de/ |


signature.asc
Description: PGP signature

Re: [PATCH] dma-buf/sync_file: Don't leak fences on merge failure

2021-06-24 Thread Jason Ekstrand

I don't have drm-misc access.  Mind pushing?

On Thu, Jun 24, 2021 at 12:59 PM Christian König
 wrote:
>
> Am 24.06.21 um 19:47 schrieb Jason Ekstrand:
> > Each add_fence() call does a dma_fence_get() on the relevant fence.  In
> > the error path, we weren't calling dma_fence_put() so all those fences
> > got leaked.  Also, in the krealloc_array failure case, we weren't
> > freeing the fences array.  Instead, ensure that i and fences are always
> > zero-initialized and dma_fence_put() all the fences and kfree(fences) on
> > every error path.
> >
> > Signed-off-by: Jason Ekstrand 
> > Fixes: a02b9dc90d84 ("dma-buf/sync_file: refactor fence storage in struct 
> > sync_file")
> > Cc: Gustavo Padovan 
> > Cc: Christian König 
>
> Reviewed-by: Christian König 
>
> > ---
> >   drivers/dma-buf/sync_file.c | 13 +++--
> >   1 file changed, 7 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
> > index 20d9bddbb985b..394e6e1e96860 100644
> > --- a/drivers/dma-buf/sync_file.c
> > +++ b/drivers/dma-buf/sync_file.c
> > @@ -211,8 +211,8 @@ static struct sync_file *sync_file_merge(const char 
> > *name, struct sync_file *a,
> >struct sync_file *b)
> >   {
> >   struct sync_file *sync_file;
> > - struct dma_fence **fences, **nfences, **a_fences, **b_fences;
> > - int i, i_a, i_b, num_fences, a_num_fences, b_num_fences;
> > + struct dma_fence **fences = NULL, **nfences, **a_fences, **b_fences;
> > + int i = 0, i_a, i_b, num_fences, a_num_fences, b_num_fences;
> >
> >   sync_file = sync_file_alloc();
> >   if (!sync_file)
> > @@ -236,7 +236,7 @@ static struct sync_file *sync_file_merge(const char 
> > *name, struct sync_file *a,
> >* If a sync_file can only be created with sync_file_merge
> >* and sync_file_create, this is a reasonable assumption.
> >*/
> > - for (i = i_a = i_b = 0; i_a < a_num_fences && i_b < b_num_fences; ) {
> > + for (i_a = i_b = 0; i_a < a_num_fences && i_b < b_num_fences; ) {
> >   struct dma_fence *pt_a = a_fences[i_a];
> >   struct dma_fence *pt_b = b_fences[i_b];
> >
> > @@ -277,15 +277,16 @@ static struct sync_file *sync_file_merge(const char 
> > *name, struct sync_file *a,
> >   fences = nfences;
> >   }
> >
> > - if (sync_file_set_fence(sync_file, fences, i) < 0) {
> > - kfree(fences);
> > + if (sync_file_set_fence(sync_file, fences, i) < 0)
> >   goto err;
> > - }
> >
> >   strlcpy(sync_file->user_name, name, sizeof(sync_file->user_name));
> >   return sync_file;
> >
> >   err:
> > + while (i)
> > + dma_fence_put(fences[--i]);
> > + kfree(fences);
> >   fput(sync_file->file);
> >   return NULL;
> >
>

[PATCH 2/2] drm/ttm, drm/i915: Update ttm_move_memcpy for async use

2021-06-24 Thread Thomas Hellström

The buffer object argument to ttm_move_memcpy was only used to
determine whether the destination memory should be cleared only
or whether we should copy data. Replace it with a "clear" bool, and
update the callers.

The intention here is to be able to use ttm_move_memcpy() async under
a dma-fence as a fallback if an accelerated blit fails in a security-
critical path where data might leak if the blit is not properly
performed. For that purpose the bo is an unsuitable argument since
its relevant members might already have changed at call time.

Finally, update the ttm_move_memcpy kerneldoc that seems to have
ended up with a stale version.

Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c |  2 +-
 drivers/gpu/drm/ttm/ttm_bo_util.c   | 20 ++--
 include/drm/ttm/ttm_bo_driver.h |  2 +-
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 4e529adcdfc7..f19847abe856 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -517,7 +517,7 @@ static void __i915_ttm_move(struct ttm_buffer_object *bo, 
bool clear,
 obj->ttm.cached_io_st,
 src_reg->region.start);
 
-   ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter);
+   ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter);
}
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 2f57f824e6db..e3747f069674 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -75,22 +75,21 @@ void ttm_mem_io_free(struct ttm_device *bdev,
 
 /**
  * ttm_move_memcpy - Helper to perform a memcpy ttm move operation.
- * @bo: The struct ttm_buffer_object.
- * @new_mem: The struct ttm_resource we're moving to (copy destination).
- * @new_iter: A struct ttm_kmap_iter representing the destination resource.
+ * @clear: Whether to clear rather than copy.
+ * @num_pages: Number of pages of the operation.
+ * @dst_iter: A struct ttm_kmap_iter representing the destination resource.
  * @src_iter: A struct ttm_kmap_iter representing the source resource.
  *
  * This function is intended to be able to move out async under a
  * dma-fence if desired.
  */
-void ttm_move_memcpy(struct ttm_buffer_object *bo,
+void ttm_move_memcpy(bool clear,
 u32 num_pages,
 struct ttm_kmap_iter *dst_iter,
 struct ttm_kmap_iter *src_iter)
 {
const struct ttm_kmap_iter_ops *dst_ops = dst_iter->ops;
const struct ttm_kmap_iter_ops *src_ops = src_iter->ops;
-   struct ttm_tt *ttm = bo->ttm;
struct dma_buf_map src_map, dst_map;
pgoff_t i;
 
@@ -99,10 +98,7 @@ void ttm_move_memcpy(struct ttm_buffer_object *bo,
return;
 
/* Don't move nonexistent data. Clear destination instead. */
-   if (src_ops->maps_tt && (!ttm || !ttm_tt_is_populated(ttm))) {
-   if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC))
-   return;
-
+   if (clear) {
for (i = 0; i < num_pages; ++i) {
dst_ops->map_local(dst_iter, &dst_map, i);
if (dst_map.is_iomem)
@@ -146,6 +142,7 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
struct ttm_kmap_iter_linear_io io;
} _dst_iter, _src_iter;
struct ttm_kmap_iter *dst_iter, *src_iter;
+   bool clear;
int ret = 0;
 
if (ttm && ((ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) ||
@@ -169,7 +166,10 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
goto out_src_iter;
}
 
-   ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter);
+   clear = src_iter->ops->maps_tt && (!ttm || !ttm_tt_is_populated(ttm));
+   if (!(clear && ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)))
+   ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter);
+
src_copy = *src_mem;
ttm_bo_move_sync_cleanup(bo, dst_mem);
 
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 68d6069572aa..5f087575194b 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -322,7 +322,7 @@ int ttm_bo_tt_bind(struct ttm_buffer_object *bo, struct 
ttm_resource *mem);
  */
 void ttm_bo_tt_destroy(struct ttm_buffer_object *bo);
 
-void ttm_move_memcpy(struct ttm_buffer_object *bo,
+void ttm_move_memcpy(bool clear,
 u32 num_pages,
 struct ttm_kmap_iter *dst_iter,
 struct ttm_kmap_iter *src_iter);
-- 
2.31.1

[PATCH 1/2] drm/i915/ttm: Reorganize the ttm move code somewhat

2021-06-24 Thread Thomas Hellström

In order to make the code a bit more readable and to facilitate
async memcpy moves, reorganize the move code a little. Determine
at an early stage whether to copy or to clear.

Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 70 ++---
 1 file changed, 40 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index c39d982c4fa6..4e529adcdfc7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -431,6 +431,7 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
 }
 
 static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
+  bool clear,
   struct ttm_resource *dst_mem,
   struct sg_table *dst_st)
 {
@@ -449,13 +450,10 @@ static int i915_ttm_accel_move(struct ttm_buffer_object 
*bo,
return -EINVAL;
 
dst_level = i915_ttm_cache_level(i915, dst_mem, ttm);
-   if (!ttm || !ttm_tt_is_populated(ttm)) {
+   if (clear) {
if (bo->type == ttm_bo_type_kernel)
return -EINVAL;
 
-   if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC))
-   return 0;
-
intel_engine_pm_get(i915->gt.migrate.context->engine);
ret = intel_context_migrate_clear(i915->gt.migrate.context, 
NULL,
  dst_st->sgl, dst_level,
@@ -489,27 +487,53 @@ static int i915_ttm_accel_move(struct ttm_buffer_object 
*bo,
return ret;
 }
 
-static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
-struct ttm_operation_ctx *ctx,
-struct ttm_resource *dst_mem,
-struct ttm_place *hop)
+static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
+   struct ttm_resource *dst_mem,
+   struct sg_table *dst_st)
 {
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
-   struct ttm_resource_manager *dst_man =
-   ttm_manager_type(bo->bdev, dst_mem->mem_type);
struct intel_memory_region *dst_reg, *src_reg;
union {
struct ttm_kmap_iter_tt tt;
struct ttm_kmap_iter_iomap io;
} _dst_iter, _src_iter;
struct ttm_kmap_iter *dst_iter, *src_iter;
-   struct sg_table *dst_st;
int ret;
 
dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type);
src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type);
GEM_BUG_ON(!dst_reg || !src_reg);
 
+   ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_st);
+   if (ret) {
+   dst_iter = !cpu_maps_iomem(dst_mem) ?
+   ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm) :
+   ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap,
+dst_st, dst_reg->region.start);
+
+   src_iter = !cpu_maps_iomem(bo->resource) ?
+   ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) :
+   ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap,
+obj->ttm.cached_io_st,
+src_reg->region.start);
+
+   ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter);
+   }
+}
+
+static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
+struct ttm_operation_ctx *ctx,
+struct ttm_resource *dst_mem,
+struct ttm_place *hop)
+{
+   struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+   struct ttm_resource_manager *dst_man =
+   ttm_manager_type(bo->bdev, dst_mem->mem_type);
+   struct ttm_tt *ttm = bo->ttm;
+   struct sg_table *dst_st;
+   bool clear;
+   int ret;
+
/* Sync for now. We could do the actual copy async. */
ret = ttm_bo_wait_ctx(bo, ctx);
if (ret)
@@ -526,9 +550,8 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool 
evict,
}
 
/* Populate ttm with pages if needed. Typically system memory. */
-   if (bo->ttm && (dst_man->use_tt ||
-   (bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED))) {
-   ret = ttm_tt_populate(bo->bdev, bo->ttm, ctx);
+   if (ttm && (dst_man->use_tt || (ttm->page_flags & 
TTM_PAGE_FLAG_SWAPPED))) {
+   ret = ttm_tt_populate(bo->bdev, ttm, ctx);
if (ret)
return ret;
}
@@ -537,23 +560,10 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, 
bool evict,
if (IS_ERR(dst_st))
return PTR_ERR(dst_st);
 
-   ret = i915_ttm_accel_move(bo, dst_mem, dst_st);
-   if (ret) {
-

[PATCH 0/2] drm/i915, drm/ttm: Update the ttm_move_memcpy() interface

2021-06-24 Thread Thomas Hellström

The ttm_move_memcpy() function was intended to be able to be used async
under a fence.
We are going to utilize that as a fallback if the gpu clearing blit fails
before we set up CPU- or GPU ptes to the memory region.
But to accomplish that the bo argument to ttm_move_memcpy() needs to be
replaced.

Patch 1 reorganizes the i915 ttm move code a bit to make the change
in patch 2 smaller.
Patch 2 updates the ttm_move_memcpy() interface.

Thomas Hellström (2):
  drm/i915/ttm: Reorganize the ttm move code somewhat
  drm/ttm, drm/i915: Update ttm_move_memcpy for async use

 drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 70 ++---
 drivers/gpu/drm/ttm/ttm_bo_util.c   | 20 +++
 include/drm/ttm/ttm_bo_driver.h |  2 +-
 3 files changed, 51 insertions(+), 41 deletions(-)

-- 
2.31.1

Re: [PATCH v2 0/2] R-Car DU: Add DSI encoder driver for V3U

2021-06-24 Thread Sam Ravnborg

Hi Laurent,
On Wed, Jun 23, 2021 at 04:56:37PM +0300, Laurent Pinchart wrote:
> Hello,
> 
> This patch series adds a driver for the DSI encoder found in the R-Car
> V3U SoC, which is the first SoC in the family that supports DSI.
> 
> The driver is based on an implementation from the BSP written by Luu
> Hoai, with lots of further rework. If anyone is interested in the
> history, a broken down v1 is available from [1].
> 
> The driver has been successfully tested on the Falcon board, which
> connects an SN65DSI86 to the output of the DSI encoder. Further patches
> are needed for the ti-sn65dsi86 driver to enable usage in plain DP
> (non-eDP) mode, as the Falcon board has a mini-DP connector wired to the
> output of the SN65DSI86. This is work in progress (see [2]).
> 
> [1] 
> https://lore.kernel.org/linux-renesas-soc/189c7a07-29cd-39f4-1dec-56aa94c11...@ideasonboard.com/T/#mafbe130386cbc8326f85cad46860813516ef4a80
> [2] 
> https://lore.kernel.org/linux-renesas-soc/20210322030128.2283-1-laurent.pinchart+rene...@ideasonboard.com/
> 
> LUU HOAI (1):
>   drm: rcar-du: Add R-Car DSI driver
> 
> Laurent Pinchart (1):
>   dt-bindings: display: bridge: Add binding for R-Car MIPI DSI/CSI-2 TX

I have browsed both patches. Both looks good:

Acked-by: Sam Ravnborg 

Sam

Re: [PATCH v14 06/12] swiotlb: Use is_swiotlb_force_bounce for swiotlb data bouncing

2021-06-24 Thread Konrad Rzeszutek Wilk

On Thu, Jun 24, 2021 at 11:58:57PM +0800, Claire Chang wrote:
> On Thu, Jun 24, 2021 at 11:56 PM Konrad Rzeszutek Wilk
>  wrote:
> >
> > On Thu, Jun 24, 2021 at 10:10:51AM -0400, Qian Cai wrote:
> > >
> > >
> > > On 6/24/2021 7:48 AM, Will Deacon wrote:
> > > > Ok, diff below which attempts to tackle the offset issue I mentioned as
> > > > well. Qian Cai -- please can you try with these changes?
> > >
> > > This works fine.
> >
> > Cool. Let me squash this patch in #6 and rebase the rest of them.
> >
> > Claire, could you check the devel/for-linus-5.14 say by end of today to
> > double check that I didn't mess anything up please?
> 
> I just submitted v15 here
> (https://lore.kernel.org/patchwork/cover/1451322/) in case it's
> helpful.

Oh! Nice!
> I'll double check of course. Thanks for the efforts!

I ended up using your patch #6 and #7. Please double-check.

Re: [PATCH v15 00/12] Restricted DMA

2021-06-24 Thread Konrad Rzeszutek Wilk

On Thu, Jun 24, 2021 at 11:55:14PM +0800, Claire Chang wrote:
> This series implements mitigations for lack of DMA access control on
> systems without an IOMMU, which could result in the DMA accessing the
> system memory at unexpected times and/or unexpected addresses, possibly
> leading to data leakage or corruption.
> 
> For example, we plan to use the PCI-e bus for Wi-Fi and that PCI-e bus is
> not behind an IOMMU. As PCI-e, by design, gives the device full access to
> system memory, a vulnerability in the Wi-Fi firmware could easily escalate
> to a full system exploit (remote wifi exploits: [1a], [1b] that shows a
> full chain of exploits; [2], [3]).
> 
> To mitigate the security concerns, we introduce restricted DMA. Restricted
> DMA utilizes the existing swiotlb to bounce streaming DMA in and out of a
> specially allocated region and does memory allocation from the same region.
> The feature on its own provides a basic level of protection against the DMA
> overwriting buffer contents at unexpected times. However, to protect
> against general data leakage and system memory corruption, the system needs
> to provide a way to restrict the DMA to a predefined memory region (this is
> usually done at firmware level, e.g. MPU in ATF on some ARM platforms [4]).
> 
> [1a] 
> https://googleprojectzero.blogspot.com/2017/04/over-air-exploiting-broadcoms-wi-fi_4.html
> [1b] 
> https://googleprojectzero.blogspot.com/2017/04/over-air-exploiting-broadcoms-wi-fi_11.html
> [2] https://blade.tencent.com/en/advisories/qualpwn/
> [3] 
> https://www.bleepingcomputer.com/news/security/vulnerabilities-found-in-highly-popular-firmware-for-wifi-chips/
> [4] 
> https://github.com/ARM-software/arm-trusted-firmware/blob/master/plat/mediatek/mt8183/drivers/emi_mpu/emi_mpu.c#L132
> 
> v15:
> - Apply Will's diff (https://lore.kernel.org/patchwork/patch/1448957/#1647521)
>   to fix the crash reported by Qian.
> - Add Stefano's Acked-by tag for patch 01/12 from v14

That all should be now be on

https://git.kernel.org/pub/scm/linux/kernel/git/konrad/swiotlb.git/
devel/for-linus-5.14 (and linux-next)

[PULL] drm-misc-fixes

2021-06-24 Thread Maxime Ripard

Hi Dave, Daniel,

Here's this week drm-misc-fixes PR

Thanks!
Maxime

drm-misc-fixes-2021-06-24:
A DMA address check for nouveau, an error code return fix for kmb, fixes
to wait for a moving fence after pinning the BO for amdgpu, nouveau and
radeon, a crtc and async page flip fix for atmel-hlcdc and a cpu hang
fix for vc4.
The following changes since commit c336a5ee984708db4826ef9e47d184e638e29717:

  drm: Lock pointer access in drm_master_release() (2021-06-10 12:22:02 +0200)

are available in the Git repository at:

  git://anongit.freedesktop.org/drm/drm-misc tags/drm-misc-fixes-2021-06-24

for you to fetch changes up to d330099115597bbc238d6758a4930e72b49ea9ba:

  drm/nouveau: fix dma_address check for CPU/GPU sync (2021-06-24 15:40:44 
+0200)


A DMA address check for nouveau, an error code return fix for kmb, fixes
to wait for a moving fence after pinning the BO for amdgpu, nouveau and
radeon, a crtc and async page flip fix for atmel-hlcdc and a cpu hang
fix for vc4.


Christian König (4):
  drm/nouveau: wait for moving fence after pinning v2
  drm/radeon: wait for moving fence after pinning
  drm/amdgpu: wait for moving fence after pinning
  drm/nouveau: fix dma_address check for CPU/GPU sync

Dan Sneddon (2):
  drm: atmel_hlcdc: Enable the crtc vblank prior to crtc usage.
  drm/atmel-hlcdc: Allow async page flips

Daniel Vetter (1):
  Revert "drm: add a locked version of drm_is_current_master"

Desmond Cheong Zhi Xi (1):
  drm: add a locked version of drm_is_current_master

Krzysztof Kozlowski (1):
  drm/panel: ld9040: reference spi_device_id table

Maxime Ripard (2):
  drm/vc4: hdmi: Move the HSM clock enable to runtime_pm
  drm/vc4: hdmi: Make sure the controller is powered in detect

Zhen Lei (1):
  drm/kmb: Fix error return code in kmb_hw_init()

 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c| 14 +++-
 drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c | 17 ++
 drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c   |  1 +
 drivers/gpu/drm/kmb/kmb_drv.c  |  1 +
 drivers/gpu/drm/nouveau/nouveau_bo.c   |  4 +--
 drivers/gpu/drm/nouveau/nouveau_prime.c| 17 +-
 drivers/gpu/drm/panel/panel-samsung-ld9040.c   |  1 +
 drivers/gpu/drm/radeon/radeon_prime.c  | 14 ++--
 drivers/gpu/drm/vc4/vc4_hdmi.c | 44 --
 9 files changed, 90 insertions(+), 23 deletions(-)


signature.asc
Description: PGP signature

Re: [PATCH 2/6] KVM: mmu: also return page from gfn_to_pfn

2021-06-24 Thread Nicholas Piggin

Excerpts from Paolo Bonzini's message of June 24, 2021 7:42 pm:
> On 24/06/21 10:52, Nicholas Piggin wrote:
>>> For now, wrap all calls to gfn_to_pfn functions in the new helper
>>> function. Callers which don't need the page struct will be updated in
>>> follow-up patches.
>> Hmm. You mean callers that do need the page will be updated? Normally
>> if there will be leftover users that don't need the struct page then
>> you would go the other way and keep the old call the same, and add a new
>> one (gfn_to_pfn_page) just for those that need it.
> 
> Needing kvm_pfn_page_unwrap is a sign that something might be buggy, so 
> it's a good idea to move the short name to the common case and the ugly 
> kvm_pfn_page_unwrap(gfn_to_pfn(...)) for the weird one.  In fact I'm not 
> sure there should be any kvm_pfn_page_unwrap in the end.

If all callers were updated that is one thing, but from the changelog
it sounds like that would not happen and there would be some gfn_to_pfn
users left over.

But yes in the end you would either need to make gfn_to_pfn never return
a page found via follow_pte, or change all callers to the new way. If 
the plan is for the latter then I guess that's fine.

Thanks,
Nick

Re: [PATCH 2/6] KVM: mmu: also return page from gfn_to_pfn

2021-06-24 Thread Nicholas Piggin

Excerpts from Paolo Bonzini's message of June 24, 2021 8:21 pm:
> On 24/06/21 12:17, Nicholas Piggin wrote:
>>> If all callers were updated that is one thing, but from the changelog
>>> it sounds like that would not happen and there would be some gfn_to_pfn
>>> users left over.
>>>
>>> But yes in the end you would either need to make gfn_to_pfn never return
>>> a page found via follow_pte, or change all callers to the new way. If
>>> the plan is for the latter then I guess that's fine.
>>
>> Actually in that case anyway I don't see the need -- the existence of
>> gfn_to_pfn is enough to know it might be buggy. It can just as easily
>> be grepped for as kvm_pfn_page_unwrap.
> 
> Sure, but that would leave us with longer function names 
> (gfn_to_pfn_page* instead of gfn_to_pfn*).  So the "safe" use is the one 
> that looks worse and the unsafe use is the one that looks safe.

The churn isn't justified because of function name length. Choose g2pp() 
if you want a non-descriptive but short name.

The existing name isn't good anyway because it not only looks up a pfn 
but also a page, and more importantly it gets a ref on the page. The
name should be changed if you introduce a new API.

>> And are gfn_to_page cases also
>> vulernable to the same issue?
> 
> No, they're just broken for the VM_IO|VM_PFNMAP case.

No they aren't vulnerable, or they are vunlerable but also broken in 
other cases?

Thanks,
Nick

Re: [PATCH 06/11] drm/v3d: Move drm_sched_job_init to v3d_job_init

2021-06-24 Thread Emma Anholt

On Thu, Jun 24, 2021 at 7:00 AM Daniel Vetter  wrote:
>
> Prep work for using the scheduler dependency handling.
>
> Signed-off-by: Daniel Vetter 
> Cc: Emma Anholt 

Back when I wrote this, I think there were rules that there had to be
no failure paths between a job_init and a push.  Has that changed?

I really don't have the context to evaluate this, I'm not sure what
new "scheduler dependency handling" is given that there was already
something that I considered to be dependency handling!

[PATCH 08/12] media: hantro: Add H.264 support for Rockchip VDPU2

2021-06-24 Thread Ezequiel Garcia

From: Jonas Karlman 

Rockchip VDPU2 core is present on RK3328, RK3326/PX30, RK3399
and others. It's similar to Hantro G1, but it's not compatible with it.

Signed-off-by: Jonas Karlman 
Signed-off-by: Ezequiel Garcia 
---
 drivers/staging/media/hantro/Makefile |   1 +
 drivers/staging/media/hantro/hantro_hw.h  |   1 +
 .../media/hantro/rockchip_vpu2_hw_h264_dec.c  | 491 ++
 3 files changed, 493 insertions(+)
 create mode 100644 drivers/staging/media/hantro/rockchip_vpu2_hw_h264_dec.c

diff --git a/drivers/staging/media/hantro/Makefile 
b/drivers/staging/media/hantro/Makefile
index 287370188d2a..90036831fec4 100644
--- a/drivers/staging/media/hantro/Makefile
+++ b/drivers/staging/media/hantro/Makefile
@@ -13,6 +13,7 @@ hantro-vpu-y += \
hantro_g2_hevc_dec.o \
hantro_g1_vp8_dec.o \
rockchip_vpu2_hw_jpeg_enc.o \
+   rockchip_vpu2_hw_h264_dec.o \
rockchip_vpu2_hw_mpeg2_dec.o \
rockchip_vpu2_hw_vp8_dec.o \
hantro_jpeg.o \
diff --git a/drivers/staging/media/hantro/hantro_hw.h 
b/drivers/staging/media/hantro/hantro_hw.h
index 7a8048afe357..9296624654a6 100644
--- a/drivers/staging/media/hantro/hantro_hw.h
+++ b/drivers/staging/media/hantro/hantro_hw.h
@@ -241,6 +241,7 @@ dma_addr_t hantro_h264_get_ref_buf(struct hantro_ctx *ctx,
 u16 hantro_h264_get_ref_nbr(struct hantro_ctx *ctx,
unsigned int dpb_idx);
 int hantro_h264_dec_prepare_run(struct hantro_ctx *ctx);
+int rockchip_vpu2_h264_dec_run(struct hantro_ctx *ctx);
 int hantro_g1_h264_dec_run(struct hantro_ctx *ctx);
 int hantro_h264_dec_init(struct hantro_ctx *ctx);
 void hantro_h264_dec_exit(struct hantro_ctx *ctx);
diff --git a/drivers/staging/media/hantro/rockchip_vpu2_hw_h264_dec.c 
b/drivers/staging/media/hantro/rockchip_vpu2_hw_h264_dec.c
new file mode 100644
index ..64a6330475eb
--- /dev/null
+++ b/drivers/staging/media/hantro/rockchip_vpu2_hw_h264_dec.c
@@ -0,0 +1,491 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hantro VPU codec driver
+ *
+ * Copyright (c) 2014 Rockchip Electronics Co., Ltd.
+ * Hertz Wong 
+ * Herman Chen 
+ *
+ * Copyright (C) 2014 Google, Inc.
+ * Tomasz Figa 
+ */
+
+#include 
+#include 
+
+#include 
+
+#include "hantro_hw.h"
+#include "hantro_v4l2.h"
+
+#define VDPU_SWREG(nr) ((nr) * 4)
+
+#define VDPU_REG_DEC_OUT_BASE  VDPU_SWREG(63)
+#define VDPU_REG_RLC_VLC_BASE  VDPU_SWREG(64)
+#define VDPU_REG_QTABLE_BASE   VDPU_SWREG(61)
+#define VDPU_REG_DIR_MV_BASE   VDPU_SWREG(62)
+#define VDPU_REG_REFER_BASE(i) (VDPU_SWREG(84 + (i)))
+#define VDPU_REG_DEC_E(v)  ((v) ? BIT(0) : 0)
+
+#define VDPU_REG_DEC_ADV_PRE_DIS(v)((v) ? BIT(11) : 0)
+#define VDPU_REG_DEC_SCMD_DIS(v)   ((v) ? BIT(10) : 0)
+#define VDPU_REG_FILTERING_DIS(v)  ((v) ? BIT(8) : 0)
+#define VDPU_REG_PIC_FIXED_QUANT(v)((v) ? BIT(7) : 0)
+#define VDPU_REG_DEC_LATENCY(v)(((v) << 1) & GENMASK(6, 1))
+
+#define VDPU_REG_INIT_QP(v)(((v) << 25) & GENMASK(30, 25))
+#define VDPU_REG_STREAM_LEN(v) (((v) << 0) & GENMASK(23, 0))
+
+#define VDPU_REG_APF_THRESHOLD(v)  (((v) << 17) & GENMASK(30, 17))
+#define VDPU_REG_STARTMB_X(v)  (((v) << 8) & GENMASK(16, 8))
+#define VDPU_REG_STARTMB_Y(v)  (((v) << 0) & GENMASK(7, 0))
+
+#define VDPU_REG_DEC_MODE(v)   (((v) << 0) & GENMASK(3, 0))
+
+#define VDPU_REG_DEC_STRENDIAN_E(v)((v) ? BIT(5) : 0)
+#define VDPU_REG_DEC_STRSWAP32_E(v)((v) ? BIT(4) : 0)
+#define VDPU_REG_DEC_OUTSWAP32_E(v)((v) ? BIT(3) : 0)
+#define VDPU_REG_DEC_INSWAP32_E(v) ((v) ? BIT(2) : 0)
+#define VDPU_REG_DEC_OUT_ENDIAN(v) ((v) ? BIT(1) : 0)
+#define VDPU_REG_DEC_IN_ENDIAN(v)  ((v) ? BIT(0) : 0)
+
+#define VDPU_REG_DEC_DATA_DISC_E(v)((v) ? BIT(22) : 0)
+#define VDPU_REG_DEC_MAX_BURST(v)  (((v) << 16) & GENMASK(20, 16))
+#define VDPU_REG_DEC_AXI_WR_ID(v)  (((v) << 8) & GENMASK(15, 8))
+#define VDPU_REG_DEC_AXI_RD_ID(v)  (((v) << 0) & GENMASK(7, 0))
+
+#define VDPU_REG_START_CODE_E(v)   ((v) ? BIT(22) : 0)
+#define VDPU_REG_CH_8PIX_ILEAV_E(v)((v) ? BIT(21) : 0)
+#define VDPU_REG_RLC_MODE_E(v) ((v) ? BIT(20) : 0)
+#define VDPU_REG_PIC_INTERLACE_E(v)((v) ? BIT(17) : 0)
+#define VDPU_REG_PIC_FIELDMODE_E(v)((v) ? BIT(16) : 0)
+#define VDPU_REG_PIC_TOPFIELD_E(v) ((v) ? BIT(13) : 0)
+#define VDPU_REG_WRITE_MVS_E(v)((v) ? BIT(10) : 0)
+#define VDPU_REG_SEQ_MBAFF_E(v)((v) ? BIT(7) : 0)
+#define VDPU_REG_PICORD_COUNT_E(v) ((v) ? BIT(6) : 0)
+#define VDPU_REG_DEC_TIMEOUT_E(v)  ((v) ? BIT(5) : 0)
+#define VDPU_REG_DEC_CLK_GATE_E(v) ((v) ? BIT(4) : 0)
+
+#define VDPU_REG_PRED_BC_TAP_0_0(v)(((v) << 22) & GENMASK(31, 22))
+#define VDPU_REG_PRED_BC_TAP_0_1(v)(((v) << 12) & GENMASK(21, 12))
+#define VDPU_REG_PRED_BC_TAP_0_2(v)(((v) << 2)

[PATCH 04/12] hantro: Make struct hantro_variant.init() optional

2021-06-24 Thread Ezequiel Garcia

The hantro_variant.init() function is there for platforms
to perform hardware-specific initialization, such as
clock rate bumping.

Not all platforms require it, so make it optional.

Signed-off-by: Ezequiel Garcia 
---
 drivers/staging/media/hantro/hantro.h  |  4 ++--
 drivers/staging/media/hantro/hantro_drv.c  | 10 ++
 drivers/staging/media/hantro/sama5d4_vdec_hw.c |  6 --
 3 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/drivers/staging/media/hantro/hantro.h 
b/drivers/staging/media/hantro/hantro.h
index a70c386de6f1..c2e2dca38628 100644
--- a/drivers/staging/media/hantro/hantro.h
+++ b/drivers/staging/media/hantro/hantro.h
@@ -61,8 +61,8 @@ struct hantro_irq {
  * @num_postproc_fmts: Number of post-processor formats.
  * @codec: Supported codecs
  * @codec_ops: Codec ops.
- * @init:  Initialize hardware.
- * @runtime_resume:reenable hardware after power gating
+ * @init:  Initialize hardware, optional.
+ * @runtime_resume:reenable hardware after power gating, optional.
  * @irqs:  array of irq names and interrupt handlers
  * @num_irqs:  number of irqs in the array
  * @clk_names: array of clock names
diff --git a/drivers/staging/media/hantro/hantro_drv.c 
b/drivers/staging/media/hantro/hantro_drv.c
index 31d8449ca1d2..9b5415176bfe 100644
--- a/drivers/staging/media/hantro/hantro_drv.c
+++ b/drivers/staging/media/hantro/hantro_drv.c
@@ -942,10 +942,12 @@ static int hantro_probe(struct platform_device *pdev)
}
}
 
-   ret = vpu->variant->init(vpu);
-   if (ret) {
-   dev_err(&pdev->dev, "Failed to init VPU hardware\n");
-   return ret;
+   if (vpu->variant->init) {
+   ret = vpu->variant->init(vpu);
+   if (ret) {
+   dev_err(&pdev->dev, "Failed to init VPU hardware\n");
+   return ret;
+   }
}
 
pm_runtime_set_autosuspend_delay(vpu->dev, 100);
diff --git a/drivers/staging/media/hantro/sama5d4_vdec_hw.c 
b/drivers/staging/media/hantro/sama5d4_vdec_hw.c
index 58ae72c2b723..9c3b8cd0b239 100644
--- a/drivers/staging/media/hantro/sama5d4_vdec_hw.c
+++ b/drivers/staging/media/hantro/sama5d4_vdec_hw.c
@@ -64,11 +64,6 @@ static const struct hantro_fmt sama5d4_vdec_fmts[] = {
},
 };
 
-static int sama5d4_hw_init(struct hantro_dev *vpu)
-{
-   return 0;
-}
-
 /*
  * Supported codec ops.
  */
@@ -109,7 +104,6 @@ const struct hantro_variant sama5d4_vdec_variant = {
.codec = HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER |
 HANTRO_H264_DECODER,
.codec_ops = sama5d4_vdec_codec_ops,
-   .init = sama5d4_hw_init,
.irqs = sama5d4_irqs,
.num_irqs = ARRAY_SIZE(sama5d4_irqs),
.clk_names = sama5d4_clk_names,
-- 
2.30.0

Re: [PATCH 0/6] KVM: Remove uses of struct page from x86 and arm64 MMU

2021-06-24 Thread Nicholas Piggin

Excerpts from Paolo Bonzini's message of June 24, 2021 10:41 pm:
> On 24/06/21 13:42, Nicholas Piggin wrote:
>> Excerpts from Nicholas Piggin's message of June 24, 2021 8:34 pm:
>>> Excerpts from David Stevens's message of June 24, 2021 1:57 pm:
 KVM supports mapping VM_IO and VM_PFNMAP memory into the guest by using
 follow_pte in gfn_to_pfn. However, the resolved pfns may not have
 assoicated struct pages, so they should not be passed to pfn_to_page.
 This series removes such calls from the x86 and arm64 secondary MMU. To
 do this, this series modifies gfn_to_pfn to return a struct page in
 addition to a pfn, if the hva was resolved by gup. This allows the
 caller to call put_page only when necessated by gup.

 This series provides a helper function that unwraps the new return type
 of gfn_to_pfn to provide behavior identical to the old behavior. As I
 have no hardware to test powerpc/mips changes, the function is used
 there for minimally invasive changes. Additionally, as gfn_to_page and
 gfn_to_pfn_cache are not integrated with mmu notifier, they cannot be
 easily changed over to only use pfns.

 This addresses CVE-2021-22543 on x86 and arm64.
>>>
>>> Does this fix the problem? (untested I don't have a POC setup at hand,
>>> but at least in concept)
>> 
>> This one actually compiles at least. Unfortunately I don't have much
>> time in the near future to test, and I only just found out about this
>> CVE a few hours ago.
> 
> And it also works (the reproducer gets an infinite stream of userspace 
> exits and especially does not crash).  We can still go for David's 
> solution later since MMU notifiers are able to deal with this pages, but 
> it's a very nice patch for stable kernels.

Oh nice, thanks for testing. How's this?

Thanks,
Nick

---

KVM: Fix page ref underflow for regions with valid but non-refcounted pages

It's possible to create a region which maps valid but non-refcounted
pages (e.g., tail pages of non-compound higher order allocations). These
host pages can then be returned by gfn_to_page, gfn_to_pfn, etc., family
of APIs, which take a reference to the page, which takes it from 0 to 1.
When the reference is dropped, this will free the page incorrectly.

Fix this by only taking a reference on the page if it was non-zero,
which indicates it is participating in normal refcounting (and can be
released with put_page).

Signed-off-by: Nicholas Piggin 
---
 virt/kvm/kvm_main.c | 19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 6a6bc7af0e28..46fb042837d2 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2055,6 +2055,13 @@ static bool vma_is_valid(struct vm_area_struct *vma, 
bool write_fault)
return true;
 }
 
+static int kvm_try_get_pfn(kvm_pfn_t pfn)
+{
+   if (kvm_is_reserved_pfn(pfn))
+   return 1;
+   return get_page_unless_zero(pfn_to_page(pfn));
+}
+
 static int hva_to_pfn_remapped(struct vm_area_struct *vma,
   unsigned long addr, bool *async,
   bool write_fault, bool *writable,
@@ -2104,13 +2111,21 @@ static int hva_to_pfn_remapped(struct vm_area_struct 
*vma,
 * Whoever called remap_pfn_range is also going to call e.g.
 * unmap_mapping_range before the underlying pages are freed,
 * causing a call to our MMU notifier.
+*
+* Certain IO or PFNMAP mappings can be backed with valid
+* struct pages, but be allocated without refcounting e.g.,
+* tail pages of non-compound higher order allocations, which
+* would then underflow the refcount when the caller does the
+* required put_page. Don't allow those pages here.
 */ 
-   kvm_get_pfn(pfn);
+   if (!kvm_try_get_pfn(pfn))
+   r = -EFAULT;
 
 out:
pte_unmap_unlock(ptep, ptl);
*p_pfn = pfn;
-   return 0;
+
+   return r;
 }
 
 /*
-- 
2.23.0

[PATCH 09/12] media: hantro: Enable H.264 on Rockchip VDPU2

2021-06-24 Thread Ezequiel Garcia

Given H.264 support for VDPU2 was just added, let's enable it.
For now, this is only enabled on platform that don't have
an RKVDEC core, such as RK3328.

Signed-off-by: Ezequiel Garcia 
---
 .../staging/media/hantro/rockchip_vpu_hw.c| 26 ++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/media/hantro/rockchip_vpu_hw.c 
b/drivers/staging/media/hantro/rockchip_vpu_hw.c
index 3ccc16413f42..e4e3b5e7689b 100644
--- a/drivers/staging/media/hantro/rockchip_vpu_hw.c
+++ b/drivers/staging/media/hantro/rockchip_vpu_hw.c
@@ -162,6 +162,19 @@ static const struct hantro_fmt rk3399_vpu_dec_fmts[] = {
.fourcc = V4L2_PIX_FMT_NV12,
.codec_mode = HANTRO_MODE_NONE,
},
+   {
+   .fourcc = V4L2_PIX_FMT_H264_SLICE,
+   .codec_mode = HANTRO_MODE_H264_DEC,
+   .max_depth = 2,
+   .frmsize = {
+   .min_width = 48,
+   .max_width = 1920,
+   .step_width = MB_DIM,
+   .min_height = 48,
+   .max_height = 1088,
+   .step_height = MB_DIM,
+   },
+   },
{
.fourcc = V4L2_PIX_FMT_MPEG2_SLICE,
.codec_mode = HANTRO_MODE_MPEG2_DEC,
@@ -388,6 +401,12 @@ static const struct hantro_codec_ops 
rk3399_vpu_codec_ops[] = {
.init = hantro_jpeg_enc_init,
.exit = hantro_jpeg_enc_exit,
},
+   [HANTRO_MODE_H264_DEC] = {
+   .run = rockchip_vpu2_h264_dec_run,
+   .reset = rockchip_vpu2_dec_reset,
+   .init = hantro_h264_dec_init,
+   .exit = hantro_h264_dec_exit,
+   },
[HANTRO_MODE_MPEG2_DEC] = {
.run = rockchip_vpu2_mpeg2_dec_run,
.reset = rockchip_vpu2_dec_reset,
@@ -433,6 +452,8 @@ static const char * const rockchip_vpu_clk_names[] = {
"aclk", "hclk"
 };
 
+/* VDPU1/VEPU1 */
+
 const struct hantro_variant rk3036_vpu_variant = {
.dec_offset = 0x400,
.dec_fmts = rk3066_vpu_dec_fmts,
@@ -495,11 +516,14 @@ const struct hantro_variant rk3288_vpu_variant = {
.num_clocks = ARRAY_SIZE(rockchip_vpu_clk_names)
 };
 
+/* VDPU2/VEPU2 */
+
 const struct hantro_variant rk3328_vpu_variant = {
.dec_offset = 0x400,
.dec_fmts = rk3399_vpu_dec_fmts,
.num_dec_fmts = ARRAY_SIZE(rk3399_vpu_dec_fmts),
-   .codec = HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER,
+   .codec = HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER |
+HANTRO_H264_DECODER,
.codec_ops = rk3399_vpu_codec_ops,
.irqs = rockchip_vdpu2_irqs,
.num_irqs = ARRAY_SIZE(rockchip_vdpu2_irqs),
-- 
2.30.0

Re: [PATCH 3/6] KVM: x86/mmu: avoid struct page in MMU

2021-06-24 Thread Nicholas Piggin

Excerpts from David Stevens's message of June 24, 2021 1:57 pm:
> From: David Stevens 
>  out_unlock:
>   if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))
>   read_unlock(&vcpu->kvm->mmu_lock);
>   else
>   write_unlock(&vcpu->kvm->mmu_lock);
> - kvm_release_pfn_clean(pfn);
> + if (pfnpg.page)
> + put_page(pfnpg.page);
>   return r;
>  }

How about

  kvm_release_pfn_page_clean(pfnpg);

Thanks,
Nick

[PATCH 07/12] media: hantro: h264: Move reference picture number to a helper

2021-06-24 Thread Ezequiel Garcia

Add a hantro_h264_get_ref_nbr() helper function to get the reference
picture numbers. This will be used by the Rockchip VDPU2 H.264 driver.

Signed-off-by: Ezequiel Garcia 
---
 drivers/staging/media/hantro/hantro_g1_h264_dec.c | 14 ++
 drivers/staging/media/hantro/hantro_h264.c| 11 +++
 drivers/staging/media/hantro/hantro_hw.h  |  2 ++
 3 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/drivers/staging/media/hantro/hantro_g1_h264_dec.c 
b/drivers/staging/media/hantro/hantro_g1_h264_dec.c
index 6faacfc44c7c..236ce24ca00c 100644
--- a/drivers/staging/media/hantro/hantro_g1_h264_dec.c
+++ b/drivers/staging/media/hantro/hantro_g1_h264_dec.c
@@ -126,7 +126,6 @@ static void set_params(struct hantro_ctx *ctx, struct 
vb2_v4l2_buffer *src_buf)
 
 static void set_ref(struct hantro_ctx *ctx)
 {
-   struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb;
const u8 *b0_reflist, *b1_reflist, *p_reflist;
struct hantro_dev *vpu = ctx->dev;
int reg_num;
@@ -143,17 +142,8 @@ static void set_ref(struct hantro_ctx *ctx)
 * subsequential reference pictures.
 */
for (i = 0; i < HANTRO_H264_DPB_SIZE; i += 2) {
-   reg = 0;
-   if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)
-   reg |= G1_REG_REF_PIC_REFER0_NBR(dpb[i].pic_num);
-   else
-   reg |= G1_REG_REF_PIC_REFER0_NBR(dpb[i].frame_num);
-
-   if (dpb[i + 1].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)
-   reg |= G1_REG_REF_PIC_REFER1_NBR(dpb[i + 1].pic_num);
-   else
-   reg |= G1_REG_REF_PIC_REFER1_NBR(dpb[i + 1].frame_num);
-
+   reg = G1_REG_REF_PIC_REFER0_NBR(hantro_h264_get_ref_nbr(ctx, 
i)) |
+ G1_REG_REF_PIC_REFER1_NBR(hantro_h264_get_ref_nbr(ctx, i 
+ 1));
vdpu_write_relaxed(vpu, reg, G1_REG_REF_PIC(i / 2));
}
 
diff --git a/drivers/staging/media/hantro/hantro_h264.c 
b/drivers/staging/media/hantro/hantro_h264.c
index 6d72136760e7..0b4d2491be3b 100644
--- a/drivers/staging/media/hantro/hantro_h264.c
+++ b/drivers/staging/media/hantro/hantro_h264.c
@@ -348,6 +348,17 @@ dma_addr_t hantro_h264_get_ref_buf(struct hantro_ctx *ctx,
return dma_addr;
 }
 
+u16 hantro_h264_get_ref_nbr(struct hantro_ctx *ctx, unsigned int dpb_idx)
+{
+   const struct v4l2_h264_dpb_entry *dpb = &ctx->h264_dec.dpb[dpb_idx];
+
+   if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
+   return 0;
+   if (dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)
+   return dpb->pic_num;
+   return dpb->frame_num;
+}
+
 int hantro_h264_dec_prepare_run(struct hantro_ctx *ctx)
 {
struct hantro_h264_dec_hw_ctx *h264_ctx = &ctx->h264_dec;
diff --git a/drivers/staging/media/hantro/hantro_hw.h 
b/drivers/staging/media/hantro/hantro_hw.h
index ce678fedaad6..7a8048afe357 100644
--- a/drivers/staging/media/hantro/hantro_hw.h
+++ b/drivers/staging/media/hantro/hantro_hw.h
@@ -238,6 +238,8 @@ void hantro_jpeg_enc_done(struct hantro_ctx *ctx);
 
 dma_addr_t hantro_h264_get_ref_buf(struct hantro_ctx *ctx,
   unsigned int dpb_idx);
+u16 hantro_h264_get_ref_nbr(struct hantro_ctx *ctx,
+   unsigned int dpb_idx);
 int hantro_h264_dec_prepare_run(struct hantro_ctx *ctx);
 int hantro_g1_h264_dec_run(struct hantro_ctx *ctx);
 int hantro_h264_dec_init(struct hantro_ctx *ctx);
-- 
2.30.0

[PATCH 10/12] dt-bindings: media: rockchip-vpu: Add PX30 compatible

2021-06-24 Thread Ezequiel Garcia

From: Paul Kocialkowski 

The Rockchip PX30 SoC has a Hantro VPU that features a decoder (VDPU2)
and an encoder (VEPU2).

Signed-off-by: Paul Kocialkowski 
Signed-off-by: Ezequiel Garcia 
---
 Documentation/devicetree/bindings/media/rockchip-vpu.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/devicetree/bindings/media/rockchip-vpu.yaml 
b/Documentation/devicetree/bindings/media/rockchip-vpu.yaml
index b88172a59de7..3b9c5aa91fcc 100644
--- a/Documentation/devicetree/bindings/media/rockchip-vpu.yaml
+++ b/Documentation/devicetree/bindings/media/rockchip-vpu.yaml
@@ -28,6 +28,9 @@ properties:
   - items:
   - const: rockchip,rk3228-vpu
   - const: rockchip,rk3399-vpu
+  - items:
+  - const: rockchip,px30-vpu
+  - const: rockchip,rk3399-vpu
 
   reg:
 maxItems: 1
-- 
2.30.0

Re: [PATCH 2/6] KVM: mmu: also return page from gfn_to_pfn

2021-06-24 Thread Nicholas Piggin

Excerpts from Nicholas Piggin's message of June 24, 2021 7:57 pm:
> Excerpts from Paolo Bonzini's message of June 24, 2021 7:42 pm:
>> On 24/06/21 10:52, Nicholas Piggin wrote:
 For now, wrap all calls to gfn_to_pfn functions in the new helper
 function. Callers which don't need the page struct will be updated in
 follow-up patches.
>>> Hmm. You mean callers that do need the page will be updated? Normally
>>> if there will be leftover users that don't need the struct page then
>>> you would go the other way and keep the old call the same, and add a new
>>> one (gfn_to_pfn_page) just for those that need it.
>> 
>> Needing kvm_pfn_page_unwrap is a sign that something might be buggy, so 
>> it's a good idea to move the short name to the common case and the ugly 
>> kvm_pfn_page_unwrap(gfn_to_pfn(...)) for the weird one.  In fact I'm not 
>> sure there should be any kvm_pfn_page_unwrap in the end.
> 
> If all callers were updated that is one thing, but from the changelog
> it sounds like that would not happen and there would be some gfn_to_pfn
> users left over.
> 
> But yes in the end you would either need to make gfn_to_pfn never return
> a page found via follow_pte, or change all callers to the new way. If 
> the plan is for the latter then I guess that's fine.

Actually in that case anyway I don't see the need -- the existence of
gfn_to_pfn is enough to know it might be buggy. It can just as easily
be grepped for as kvm_pfn_page_unwrap. And are gfn_to_page cases also
vulernable to the same issue?

So I think it could be marked deprecated or something if not everything 
will be converted in the one series, and don't need to touch all that 
arch code with this patch.

Thanks,
Nick

[PATCH 11/12] arm64: dts: rockchip: Add VPU support for the PX30

2021-06-24 Thread Ezequiel Garcia

From: Paul Kocialkowski 

The PX30 has a VPU (both decoder and encoder) with a dedicated IOMMU.
Describe these two entities in device-tree.

Signed-off-by: Paul Kocialkowski 
Signed-off-by: Ezequiel Garcia 
---
 arch/arm64/boot/dts/rockchip/px30.dtsi | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/arch/arm64/boot/dts/rockchip/px30.dtsi 
b/arch/arm64/boot/dts/rockchip/px30.dtsi
index 09baa8a167ce..892eb074775b 100644
--- a/arch/arm64/boot/dts/rockchip/px30.dtsi
+++ b/arch/arm64/boot/dts/rockchip/px30.dtsi
@@ -1016,6 +1016,29 @@ gpu: gpu@ff40 {
status = "disabled";
};
 
+   vpu: video-codec@ff442000 {
+   compatible = "rockchip,px30-vpu", "rockchip,rk3399-vpu";
+   reg = <0x0 0xff442000 0x0 0x800>;
+   interrupts = ,
+;
+   interrupt-names = "vepu", "vdpu";
+   clocks = <&cru ACLK_VPU>, <&cru HCLK_VPU>;
+   clock-names = "aclk", "hclk";
+   iommus = <&vpu_mmu>;
+   power-domains = <&power PX30_PD_VPU>;
+   };
+
+   vpu_mmu: iommu@ff442800 {
+   compatible = "rockchip,iommu";
+   reg = <0x0 0xff442800 0x0 0x100>;
+   interrupts = ;
+   interrupt-names = "vpu_mmu";
+   clocks = <&cru ACLK_VPU>, <&cru HCLK_VPU>;
+   clock-names = "aclk", "iface";
+   #iommu-cells = <0>;
+   power-domains = <&power PX30_PD_VPU>;
+   };
+
dsi: dsi@ff45 {
compatible = "rockchip,px30-mipi-dsi";
reg = <0x0 0xff45 0x0 0x1>;
-- 
2.30.0

[PATCH 02/12] arm64: dts: rockchip: Add panel orientation to Odroid Go Advance

2021-06-24 Thread Ezequiel Garcia

The Odroid Go Advance panel is rotated, so let's reflect this
in the device tree.

Signed-off-by: Ezequiel Garcia 
---
 arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts 
b/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts
index 49c97f76df77..cca19660e60a 100644
--- a/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts
@@ -240,6 +240,7 @@ panel@0 {
iovcc-supply = <&vcc_lcd>;
reset-gpios = <&gpio3 RK_PC0 GPIO_ACTIVE_LOW>;
vdd-supply = <&vcc_lcd>;
+   rotation = <270>;
 
port {
mipi_in_panel: endpoint {
-- 
2.30.0

Re: [PATCH 1/6] KVM: x86/mmu: release audited pfns

2021-06-24 Thread Nicholas Piggin

Excerpts from David Stevens's message of June 24, 2021 1:57 pm:
> From: David Stevens 

Changelog? This looks like a bug, should it have a Fixes: tag?

Thanks,
Nick

> 
> Signed-off-by: David Stevens 
> ---
>  arch/x86/kvm/mmu/mmu_audit.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/arch/x86/kvm/mmu/mmu_audit.c b/arch/x86/kvm/mmu/mmu_audit.c
> index cedc17b2f60e..97ff184084b4 100644
> --- a/arch/x86/kvm/mmu/mmu_audit.c
> +++ b/arch/x86/kvm/mmu/mmu_audit.c
> @@ -121,6 +121,8 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 
> *sptep, int level)
>   audit_printk(vcpu->kvm, "levels %d pfn %llx hpa %llx "
>"ent %llxn", vcpu->arch.mmu->root_level, pfn,
>hpa, *sptep);
> +
> + kvm_release_pfn_clean(pfn);
>  }
>  
>  static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
> -- 
> 2.32.0.93.g670b81a890-goog
> 
>

Re: [PATCH 02/12] arm64: dts: rockchip: Add panel orientation to Odroid Go Advance

2021-06-24 Thread Ezequiel Garcia

On Thu, 2021-06-24 at 20:37 +0200, Heiko Stübner wrote:
> Am Donnerstag, 24. Juni 2021, 20:26:02 CEST schrieb Ezequiel Garcia:
> > The Odroid Go Advance panel is rotated, so let's reflect this
> > in the device tree.
> > 
> > Signed-off-by: Ezequiel Garcia 
> 
> similar patch already applied for 5.14:
> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts?id=edb39de5d731f147c7b08c4a5eb246ae1dbdd947

Great minds think alike! :)

Thanks,
Ezequiel

[PATCH 05/12] media: hantro: Avoid redundant hantro_get_{dst, src}_buf() calls

2021-06-24 Thread Ezequiel Garcia

Getting the next src/dst buffer is relatively expensive
so avoid doing it multiple times.

Signed-off-by: Ezequiel Garcia 
---
 .../staging/media/hantro/hantro_g1_h264_dec.c | 17 -
 .../staging/media/hantro/hantro_g1_vp8_dec.c  | 18 +-
 .../media/hantro/rockchip_vpu2_hw_vp8_dec.c   | 19 +--
 3 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/drivers/staging/media/hantro/hantro_g1_h264_dec.c 
b/drivers/staging/media/hantro/hantro_g1_h264_dec.c
index 5c792b7bcb79..2aa37baad0c3 100644
--- a/drivers/staging/media/hantro/hantro_g1_h264_dec.c
+++ b/drivers/staging/media/hantro/hantro_g1_h264_dec.c
@@ -19,13 +19,12 @@
 #include "hantro_hw.h"
 #include "hantro_v4l2.h"
 
-static void set_params(struct hantro_ctx *ctx)
+static void set_params(struct hantro_ctx *ctx, struct vb2_v4l2_buffer *src_buf)
 {
const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls;
const struct v4l2_ctrl_h264_decode_params *dec_param = ctrls->decode;
const struct v4l2_ctrl_h264_sps *sps = ctrls->sps;
const struct v4l2_ctrl_h264_pps *pps = ctrls->pps;
-   struct vb2_v4l2_buffer *src_buf = hantro_get_src_buf(ctx);
struct hantro_dev *vpu = ctx->dev;
u32 reg;
 
@@ -226,22 +225,20 @@ static void set_ref(struct hantro_ctx *ctx)
}
 }
 
-static void set_buffers(struct hantro_ctx *ctx)
+static void set_buffers(struct hantro_ctx *ctx, struct vb2_v4l2_buffer 
*src_buf)
 {
const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls;
-   struct vb2_v4l2_buffer *src_buf, *dst_buf;
+   struct vb2_v4l2_buffer *dst_buf;
struct hantro_dev *vpu = ctx->dev;
dma_addr_t src_dma, dst_dma;
size_t offset = 0;
 
-   src_buf = hantro_get_src_buf(ctx);
-   dst_buf = hantro_get_dst_buf(ctx);
-
/* Source (stream) buffer. */
src_dma = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0);
vdpu_write_relaxed(vpu, src_dma, G1_REG_ADDR_STR);
 
/* Destination (decoded frame) buffer. */
+   dst_buf = hantro_get_dst_buf(ctx);
dst_dma = hantro_get_dec_buf_addr(ctx, &dst_buf->vb2_buf);
/* Adjust dma addr to start at second line for bottom field */
if (ctrls->decode->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD)
@@ -276,6 +273,7 @@ static void set_buffers(struct hantro_ctx *ctx)
 int hantro_g1_h264_dec_run(struct hantro_ctx *ctx)
 {
struct hantro_dev *vpu = ctx->dev;
+   struct vb2_v4l2_buffer *src_buf;
int ret;
 
/* Prepare the H264 decoder context. */
@@ -284,9 +282,10 @@ int hantro_g1_h264_dec_run(struct hantro_ctx *ctx)
return ret;
 
/* Configure hardware registers. */
-   set_params(ctx);
+   src_buf = hantro_get_src_buf(ctx);
+   set_params(ctx, src_buf);
set_ref(ctx);
-   set_buffers(ctx);
+   set_buffers(ctx, src_buf);
 
hantro_end_prepare_run(ctx);
 
diff --git a/drivers/staging/media/hantro/hantro_g1_vp8_dec.c 
b/drivers/staging/media/hantro/hantro_g1_vp8_dec.c
index 2afd5996d75f..6180b23e7d94 100644
--- a/drivers/staging/media/hantro/hantro_g1_vp8_dec.c
+++ b/drivers/staging/media/hantro/hantro_g1_vp8_dec.c
@@ -367,13 +367,12 @@ static void cfg_tap(struct hantro_ctx *ctx,
 }
 
 static void cfg_ref(struct hantro_ctx *ctx,
-   const struct v4l2_ctrl_vp8_frame *hdr)
+   const struct v4l2_ctrl_vp8_frame *hdr,
+   struct vb2_v4l2_buffer *vb2_dst)
 {
struct hantro_dev *vpu = ctx->dev;
-   struct vb2_v4l2_buffer *vb2_dst;
dma_addr_t ref;
 
-   vb2_dst = hantro_get_dst_buf(ctx);
 
ref = hantro_get_ref(ctx, hdr->last_frame_ts);
if (!ref) {
@@ -405,16 +404,14 @@ static void cfg_ref(struct hantro_ctx *ctx,
 }
 
 static void cfg_buffers(struct hantro_ctx *ctx,
-   const struct v4l2_ctrl_vp8_frame *hdr)
+   const struct v4l2_ctrl_vp8_frame *hdr,
+   struct vb2_v4l2_buffer *vb2_dst)
 {
const struct v4l2_vp8_segment *seg = &hdr->segment;
struct hantro_dev *vpu = ctx->dev;
-   struct vb2_v4l2_buffer *vb2_dst;
dma_addr_t dst_dma;
u32 reg;
 
-   vb2_dst = hantro_get_dst_buf(ctx);
-
/* Set probability table buffer address */
vdpu_write_relaxed(vpu, ctx->vp8_dec.prob_tbl.dma,
   G1_REG_ADDR_QTABLE);
@@ -436,6 +433,7 @@ int hantro_g1_vp8_dec_run(struct hantro_ctx *ctx)
 {
const struct v4l2_ctrl_vp8_frame *hdr;
struct hantro_dev *vpu = ctx->dev;
+   struct vb2_v4l2_buffer *vb2_dst;
size_t height = ctx->dst_fmt.height;
size_t width = ctx->dst_fmt.width;
u32 mb_width, mb_height;
@@ -499,8 +497,10 @@ int hantro_g1_vp8_dec_run(struct hantro_ctx *ctx)
cfg_qp(ctx, hdr);
cfg_parts(ctx, hdr);
cfg_tap(ctx, hdr);
-   cfg_ref(ctx, hdr);
-   cfg_buffers(ctx, hdr);
+
+   vb

[PATCH 12/12] media: hantro: Add support for the Rockchip PX30

2021-06-24 Thread Ezequiel Garcia

From: Paul Kocialkowski 

The PX30 SoC includes both the VDPU2 and VEPU2 blocks which are similar
to the RK3399 (Hantro G1/H1 with shuffled registers).

Signed-off-by: Paul Kocialkowski 
Signed-off-by: Ezequiel Garcia 
---
 drivers/staging/media/hantro/hantro_drv.c |  1 +
 drivers/staging/media/hantro/hantro_hw.h  |  1 +
 .../staging/media/hantro/rockchip_vpu_hw.c| 28 +++
 3 files changed, 30 insertions(+)

diff --git a/drivers/staging/media/hantro/hantro_drv.c 
b/drivers/staging/media/hantro/hantro_drv.c
index 9b5415176bfe..8a2edd67f2c6 100644
--- a/drivers/staging/media/hantro/hantro_drv.c
+++ b/drivers/staging/media/hantro/hantro_drv.c
@@ -582,6 +582,7 @@ static const struct v4l2_file_operations hantro_fops = {
 
 static const struct of_device_id of_hantro_match[] = {
 #ifdef CONFIG_VIDEO_HANTRO_ROCKCHIP
+   { .compatible = "rockchip,px30-vpu",   .data = &px30_vpu_variant, },
{ .compatible = "rockchip,rk3036-vpu", .data = &rk3036_vpu_variant, },
{ .compatible = "rockchip,rk3066-vpu", .data = &rk3066_vpu_variant, },
{ .compatible = "rockchip,rk3288-vpu", .data = &rk3288_vpu_variant, },
diff --git a/drivers/staging/media/hantro/hantro_hw.h 
b/drivers/staging/media/hantro/hantro_hw.h
index 9296624654a6..df7b5e3a57b9 100644
--- a/drivers/staging/media/hantro/hantro_hw.h
+++ b/drivers/staging/media/hantro/hantro_hw.h
@@ -209,6 +209,7 @@ enum hantro_enc_fmt {
 
 extern const struct hantro_variant imx8mq_vpu_g2_variant;
 extern const struct hantro_variant imx8mq_vpu_variant;
+extern const struct hantro_variant px30_vpu_variant;
 extern const struct hantro_variant rk3036_vpu_variant;
 extern const struct hantro_variant rk3066_vpu_variant;
 extern const struct hantro_variant rk3288_vpu_variant;
diff --git a/drivers/staging/media/hantro/rockchip_vpu_hw.c 
b/drivers/staging/media/hantro/rockchip_vpu_hw.c
index e4e3b5e7689b..e7f56e30b4a8 100644
--- a/drivers/staging/media/hantro/rockchip_vpu_hw.c
+++ b/drivers/staging/media/hantro/rockchip_vpu_hw.c
@@ -16,6 +16,7 @@
 
 #define RK3066_ACLK_MAX_FREQ (300 * 1000 * 1000)
 #define RK3288_ACLK_MAX_FREQ (400 * 1000 * 1000)
+#define PX30_ACLK_MAX_FREQ (300 * 1000 * 1000)
 
 /*
  * Supported formats.
@@ -279,6 +280,12 @@ static int rockchip_vpu_hw_init(struct hantro_dev *vpu)
return 0;
 }
 
+static int px30_vpu_hw_init(struct hantro_dev *vpu)
+{
+   clk_set_rate(vpu->clocks[0].clk, PX30_ACLK_MAX_FREQ);
+   return 0;
+}
+
 static void rk3066_vpu_dec_reset(struct hantro_ctx *ctx)
 {
struct hantro_dev *vpu = ctx->dev;
@@ -452,6 +459,10 @@ static const char * const rockchip_vpu_clk_names[] = {
"aclk", "hclk"
 };
 
+static const char * const px30_clk_names[] = {
+   "aclk", "hclk"
+};
+
 /* VDPU1/VEPU1 */
 
 const struct hantro_variant rk3036_vpu_variant = {
@@ -548,3 +559,20 @@ const struct hantro_variant rk3399_vpu_variant = {
.clk_names = rockchip_vpu_clk_names,
.num_clocks = ARRAY_SIZE(rockchip_vpu_clk_names)
 };
+
+const struct hantro_variant px30_vpu_variant = {
+   .enc_offset = 0x0,
+   .enc_fmts = rockchip_vpu_enc_fmts,
+   .num_enc_fmts = ARRAY_SIZE(rockchip_vpu_enc_fmts),
+   .dec_offset = 0x400,
+   .dec_fmts = rk3399_vpu_dec_fmts,
+   .num_dec_fmts = ARRAY_SIZE(rk3399_vpu_dec_fmts),
+   .codec = HANTRO_JPEG_ENCODER | HANTRO_MPEG2_DECODER |
+HANTRO_VP8_DECODER | HANTRO_H264_DECODER,
+   .codec_ops = rk3399_vpu_codec_ops,
+   .irqs = rockchip_vpu2_irqs,
+   .num_irqs = ARRAY_SIZE(rockchip_vpu2_irqs),
+   .init = px30_vpu_hw_init,
+   .clk_names = px30_clk_names,
+   .num_clocks = ARRAY_SIZE(px30_clk_names)
+};
-- 
2.30.0

Re: [PATCH 2/6] KVM: mmu: also return page from gfn_to_pfn

2021-06-24 Thread Nicholas Piggin

Excerpts from David Stevens's message of June 24, 2021 1:57 pm:
> From: David Stevens 
> 
> Return a struct kvm_pfn_page containing both a pfn and an optional
> struct page from the gfn_to_pfn family of functions. This differentiates
> the gup and follow_fault_pfn cases, which allows callers that only need
> a pfn to avoid touching the page struct in the latter case. For callers
> that need a struct page, introduce a helper function that unwraps a
> struct kvm_pfn_page into a struct page. This helper makes the call to
> kvm_get_pfn which had previously been in hva_to_pfn_remapped.
> 
> For now, wrap all calls to gfn_to_pfn functions in the new helper
> function. Callers which don't need the page struct will be updated in
> follow-up patches.

Hmm. You mean callers that do need the page will be updated? Normally 
if there will be leftover users that don't need the struct page then
you would go the other way and keep the old call the same, and add a new
one (gfn_to_pfn_page) just for those that need it.

Most kernel code I look at passes back multiple values by updating 
pointers to struct or variables rather than returning a struct, I 
suppose that's not really a big deal and a matter of taste.

Thanks,
Nick

[PATCH 06/12] media: hantro: h264: Move DPB valid and long-term bitmaps

2021-06-24 Thread Ezequiel Garcia

In order to reuse these bitmaps, move this process to
struct hantro_h264_dec_hw_ctx. This will be used by
the Rockchip VDPU2 H.264 driver.

Signed-off-by: Ezequiel Garcia 
---
 .../staging/media/hantro/hantro_g1_h264_dec.c   | 17 ++---
 drivers/staging/media/hantro/hantro_h264.c  | 13 +
 drivers/staging/media/hantro/hantro_hw.h|  4 
 3 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/drivers/staging/media/hantro/hantro_g1_h264_dec.c 
b/drivers/staging/media/hantro/hantro_g1_h264_dec.c
index 2aa37baad0c3..6faacfc44c7c 100644
--- a/drivers/staging/media/hantro/hantro_g1_h264_dec.c
+++ b/drivers/staging/media/hantro/hantro_g1_h264_dec.c
@@ -129,25 +129,12 @@ static void set_ref(struct hantro_ctx *ctx)
struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb;
const u8 *b0_reflist, *b1_reflist, *p_reflist;
struct hantro_dev *vpu = ctx->dev;
-   u32 dpb_longterm = 0;
-   u32 dpb_valid = 0;
int reg_num;
u32 reg;
int i;
 
-   /*
-* Set up bit maps of valid and long term DPBs.
-* NOTE: The bits are reversed, i.e. MSb is DPB 0.
-*/
-   for (i = 0; i < HANTRO_H264_DPB_SIZE; ++i) {
-   if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)
-   dpb_valid |= BIT(HANTRO_H264_DPB_SIZE - 1 - i);
-
-   if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)
-   dpb_longterm |= BIT(HANTRO_H264_DPB_SIZE - 1 - i);
-   }
-   vdpu_write_relaxed(vpu, dpb_valid << 16, G1_REG_VALID_REF);
-   vdpu_write_relaxed(vpu, dpb_longterm << 16, G1_REG_LT_REF);
+   vdpu_write_relaxed(vpu, ctx->h264_dec.dpb_valid, G1_REG_VALID_REF);
+   vdpu_write_relaxed(vpu, ctx->h264_dec.dpb_longterm, G1_REG_LT_REF);
 
/*
 * Set up reference frame picture numbers.
diff --git a/drivers/staging/media/hantro/hantro_h264.c 
b/drivers/staging/media/hantro/hantro_h264.c
index ed6eaf11d96f..6d72136760e7 100644
--- a/drivers/staging/media/hantro/hantro_h264.c
+++ b/drivers/staging/media/hantro/hantro_h264.c
@@ -229,12 +229,25 @@ static void prepare_table(struct hantro_ctx *ctx)
const struct v4l2_ctrl_h264_decode_params *dec_param = ctrls->decode;
struct hantro_h264_dec_priv_tbl *tbl = ctx->h264_dec.priv.cpu;
const struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb;
+   u32 dpb_longterm = 0;
+   u32 dpb_valid = 0;
int i;
 
for (i = 0; i < HANTRO_H264_DPB_SIZE; ++i) {
tbl->poc[i * 2] = dpb[i].top_field_order_cnt;
tbl->poc[i * 2 + 1] = dpb[i].bottom_field_order_cnt;
+
+   /*
+* Set up bit maps of valid and long term DPBs.
+* NOTE: The bits are reversed, i.e. MSb is DPB 0.
+*/
+   if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)
+   dpb_valid |= BIT(HANTRO_H264_DPB_SIZE - 1 - i);
+   if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)
+   dpb_longterm |= BIT(HANTRO_H264_DPB_SIZE - 1 - i);
}
+   ctx->h264_dec.dpb_valid = dpb_valid << 16;
+   ctx->h264_dec.dpb_longterm = dpb_longterm << 16;
 
tbl->poc[32] = dec_param->top_field_order_cnt;
tbl->poc[33] = dec_param->bottom_field_order_cnt;
diff --git a/drivers/staging/media/hantro/hantro_hw.h 
b/drivers/staging/media/hantro/hantro_hw.h
index 5dcf65805396..ce678fedaad6 100644
--- a/drivers/staging/media/hantro/hantro_hw.h
+++ b/drivers/staging/media/hantro/hantro_hw.h
@@ -89,12 +89,16 @@ struct hantro_h264_dec_reflists {
  * @dpb:   DPB
  * @reflists:  P/B0/B1 reflists
  * @ctrls: V4L2 controls attached to a run
+ * @dpb_longterm: DPB long-term
+ * @dpb_valid:   DPB valid
  */
 struct hantro_h264_dec_hw_ctx {
struct hantro_aux_buf priv;
struct v4l2_h264_dpb_entry dpb[HANTRO_H264_DPB_SIZE];
struct hantro_h264_dec_reflists reflists;
struct hantro_h264_dec_ctrls ctrls;
+   u32 dpb_longterm;
+   u32 dpb_valid;
 };
 
 /**
-- 
2.30.0

Re: [PATCH 0/6] KVM: Remove uses of struct page from x86 and arm64 MMU

2021-06-24 Thread Nicholas Piggin

Excerpts from David Stevens's message of June 24, 2021 1:57 pm:
> KVM supports mapping VM_IO and VM_PFNMAP memory into the guest by using
> follow_pte in gfn_to_pfn. However, the resolved pfns may not have
> assoicated struct pages, so they should not be passed to pfn_to_page.
> This series removes such calls from the x86 and arm64 secondary MMU. To
> do this, this series modifies gfn_to_pfn to return a struct page in
> addition to a pfn, if the hva was resolved by gup. This allows the
> caller to call put_page only when necessated by gup.
> 
> This series provides a helper function that unwraps the new return type
> of gfn_to_pfn to provide behavior identical to the old behavior. As I
> have no hardware to test powerpc/mips changes, the function is used
> there for minimally invasive changes. Additionally, as gfn_to_page and
> gfn_to_pfn_cache are not integrated with mmu notifier, they cannot be
> easily changed over to only use pfns.
> 
> This addresses CVE-2021-22543 on x86 and arm64.

Does this fix the problem? (untested I don't have a POC setup at hand,
but at least in concept)

I have no problem with improving the API and probably in the direction 
of your series is good. But there seems to be a lot of unfixed arch code 
and broken APIs remaining left to do after your series too. This might 
be most suitable to backport and as a base for your series that can take
more time to convert to new APIs.

Thanks,
Nick

---

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 6a6bc7af0e28..e208c279d903 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2104,13 +2104,21 @@ static int hva_to_pfn_remapped(struct vm_area_struct 
*vma,
 * Whoever called remap_pfn_range is also going to call e.g.
 * unmap_mapping_range before the underlying pages are freed,
 * causing a call to our MMU notifier.
+*
+* Certain IO or PFNMAP mappings can be backed with valid
+* struct pages, but be allocated without refcounting e.g.,
+* tail pages of non-compound higher order allocations, which
+* would then underflow the refcount when the caller does the
+* required put_page. Don't allow those pages here.
 */ 
-   kvm_get_pfn(pfn);
+   if (!kvm_try_get_pfn(pfn))
+   r = -EFAULT;
 
 out:
pte_unmap_unlock(ptep, ptl);
*p_pfn = pfn;
-   return 0;
+
+   return r;
 }
 
 /*
@@ -2487,6 +2495,13 @@ void kvm_set_pfn_accessed(kvm_pfn_t pfn)
 }
 EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
 
+static int kvm_try_get_pfn(kvm_pfn_t pfn)
+{
+   if (kvm_is_reserved_pfn(pfn))
+   return 1;
+   return get_page_unless_zero(pfn_to_page(pfn));
+}
+
 void kvm_get_pfn(kvm_pfn_t pfn)
 {
if (!kvm_is_reserved_pfn(pfn))

Re: [PATCH 0/6] KVM: Remove uses of struct page from x86 and arm64 MMU

2021-06-24 Thread Nicholas Piggin

Excerpts from Nicholas Piggin's message of June 24, 2021 8:34 pm:
> Excerpts from David Stevens's message of June 24, 2021 1:57 pm:
>> KVM supports mapping VM_IO and VM_PFNMAP memory into the guest by using
>> follow_pte in gfn_to_pfn. However, the resolved pfns may not have
>> assoicated struct pages, so they should not be passed to pfn_to_page.
>> This series removes such calls from the x86 and arm64 secondary MMU. To
>> do this, this series modifies gfn_to_pfn to return a struct page in
>> addition to a pfn, if the hva was resolved by gup. This allows the
>> caller to call put_page only when necessated by gup.
>> 
>> This series provides a helper function that unwraps the new return type
>> of gfn_to_pfn to provide behavior identical to the old behavior. As I
>> have no hardware to test powerpc/mips changes, the function is used
>> there for minimally invasive changes. Additionally, as gfn_to_page and
>> gfn_to_pfn_cache are not integrated with mmu notifier, they cannot be
>> easily changed over to only use pfns.
>> 
>> This addresses CVE-2021-22543 on x86 and arm64.
> 
> Does this fix the problem? (untested I don't have a POC setup at hand,
> but at least in concept)

This one actually compiles at least. Unfortunately I don't have much 
time in the near future to test, and I only just found out about this
CVE a few hours ago.

---


It's possible to create a region which maps valid but non-refcounted
pages (e.g., tail pages of non-compound higher order allocations). These
host pages can then be returned by gfn_to_page, gfn_to_pfn, etc., family
of APIs, which take a reference to the page, which takes it from 0 to 1.
When the reference is dropped, this will free the page incorrectly.

Fix this by only taking a reference on the page if it was non-zero,
which indicates it is participating in normal refcounting (and can be
released with put_page).

---
 virt/kvm/kvm_main.c | 19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 6a6bc7af0e28..46fb042837d2 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2055,6 +2055,13 @@ static bool vma_is_valid(struct vm_area_struct *vma, 
bool write_fault)
return true;
 }
 
+static int kvm_try_get_pfn(kvm_pfn_t pfn)
+{
+   if (kvm_is_reserved_pfn(pfn))
+   return 1;
+   return get_page_unless_zero(pfn_to_page(pfn));
+}
+
 static int hva_to_pfn_remapped(struct vm_area_struct *vma,
   unsigned long addr, bool *async,
   bool write_fault, bool *writable,
@@ -2104,13 +2111,21 @@ static int hva_to_pfn_remapped(struct vm_area_struct 
*vma,
 * Whoever called remap_pfn_range is also going to call e.g.
 * unmap_mapping_range before the underlying pages are freed,
 * causing a call to our MMU notifier.
+*
+* Certain IO or PFNMAP mappings can be backed with valid
+* struct pages, but be allocated without refcounting e.g.,
+* tail pages of non-compound higher order allocations, which
+* would then underflow the refcount when the caller does the
+* required put_page. Don't allow those pages here.
 */ 
-   kvm_get_pfn(pfn);
+   if (!kvm_try_get_pfn(pfn))
+   r = -EFAULT;
 
 out:
pte_unmap_unlock(ptep, ptl);
*p_pfn = pfn;
-   return 0;
+
+   return r;
 }
 
 /*
-- 
2.23.0

[PATCH 03/12] hantro: vp8: Move noisy WARN_ON to vpu_debug

2021-06-24 Thread Ezequiel Garcia

When the VP8 decoders can't find a reference frame,
the driver falls back to the current output frame.

This will probably produce some undesirable results,
leading to frame corruption, but shouldn't cause
noisy warnings.

Signed-off-by: Ezequiel Garcia 
Acked-by: Nicolas Dufresne 
---
 drivers/staging/media/hantro/hantro_g1_vp8_dec.c| 13 ++---
 .../staging/media/hantro/rockchip_vpu2_hw_vp8_dec.c | 13 ++---
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/media/hantro/hantro_g1_vp8_dec.c 
b/drivers/staging/media/hantro/hantro_g1_vp8_dec.c
index 96622a7f8279..2afd5996d75f 100644
--- a/drivers/staging/media/hantro/hantro_g1_vp8_dec.c
+++ b/drivers/staging/media/hantro/hantro_g1_vp8_dec.c
@@ -376,12 +376,17 @@ static void cfg_ref(struct hantro_ctx *ctx,
vb2_dst = hantro_get_dst_buf(ctx);
 
ref = hantro_get_ref(ctx, hdr->last_frame_ts);
-   if (!ref)
+   if (!ref) {
+   vpu_debug(0, "failed to find last frame ts=%llu\n",
+ hdr->last_frame_ts);
ref = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
+   }
vdpu_write_relaxed(vpu, ref, G1_REG_ADDR_REF(0));
 
ref = hantro_get_ref(ctx, hdr->golden_frame_ts);
-   WARN_ON(!ref && hdr->golden_frame_ts);
+   if (!ref && hdr->golden_frame_ts)
+   vpu_debug(0, "failed to find golden frame ts=%llu\n",
+ hdr->golden_frame_ts);
if (!ref)
ref = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
if (hdr->flags & V4L2_VP8_FRAME_FLAG_SIGN_BIAS_GOLDEN)
@@ -389,7 +394,9 @@ static void cfg_ref(struct hantro_ctx *ctx,
vdpu_write_relaxed(vpu, ref, G1_REG_ADDR_REF(4));
 
ref = hantro_get_ref(ctx, hdr->alt_frame_ts);
-   WARN_ON(!ref && hdr->alt_frame_ts);
+   if (!ref && hdr->alt_frame_ts)
+   vpu_debug(0, "failed to find alt frame ts=%llu\n",
+ hdr->alt_frame_ts);
if (!ref)
ref = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
if (hdr->flags & V4L2_VP8_FRAME_FLAG_SIGN_BIAS_ALT)
diff --git a/drivers/staging/media/hantro/rockchip_vpu2_hw_vp8_dec.c 
b/drivers/staging/media/hantro/rockchip_vpu2_hw_vp8_dec.c
index 951b55f58a61..704607511b57 100644
--- a/drivers/staging/media/hantro/rockchip_vpu2_hw_vp8_dec.c
+++ b/drivers/staging/media/hantro/rockchip_vpu2_hw_vp8_dec.c
@@ -453,12 +453,17 @@ static void cfg_ref(struct hantro_ctx *ctx,
vb2_dst = hantro_get_dst_buf(ctx);
 
ref = hantro_get_ref(ctx, hdr->last_frame_ts);
-   if (!ref)
+   if (!ref) {
+   vpu_debug(0, "failed to find last frame ts=%llu\n",
+ hdr->last_frame_ts);
ref = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
+   }
vdpu_write_relaxed(vpu, ref, VDPU_REG_VP8_ADDR_REF0);
 
ref = hantro_get_ref(ctx, hdr->golden_frame_ts);
-   WARN_ON(!ref && hdr->golden_frame_ts);
+   if (!ref && hdr->golden_frame_ts)
+   vpu_debug(0, "failed to find golden frame ts=%llu\n",
+ hdr->golden_frame_ts);
if (!ref)
ref = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
if (hdr->flags & V4L2_VP8_FRAME_FLAG_SIGN_BIAS_GOLDEN)
@@ -466,7 +471,9 @@ static void cfg_ref(struct hantro_ctx *ctx,
vdpu_write_relaxed(vpu, ref, VDPU_REG_VP8_ADDR_REF2_5(2));
 
ref = hantro_get_ref(ctx, hdr->alt_frame_ts);
-   WARN_ON(!ref && hdr->alt_frame_ts);
+   if (!ref && hdr->alt_frame_ts)
+   vpu_debug(0, "failed to find alt frame ts=%llu\n",
+ hdr->alt_frame_ts);
if (!ref)
ref = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
if (hdr->flags & V4L2_VP8_FRAME_FLAG_SIGN_BIAS_ALT)
-- 
2.30.0

[PATCH 01/12] drm/panel: kd35t133: Add panel orientation support

2021-06-24 Thread Ezequiel Garcia

Parse the device tree rotation specifier, and set a DRM
connector orientation property. The property can then be read
by compositors to apply hardware plane rotation or a GPU transform.

Signed-off-by: Ezequiel Garcia 
---
 drivers/gpu/drm/panel/panel-elida-kd35t133.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/panel/panel-elida-kd35t133.c 
b/drivers/gpu/drm/panel/panel-elida-kd35t133.c
index fe5ac3ef9018..5987d28c874c 100644
--- a/drivers/gpu/drm/panel/panel-elida-kd35t133.c
+++ b/drivers/gpu/drm/panel/panel-elida-kd35t133.c
@@ -39,6 +39,7 @@
 struct kd35t133 {
struct device *dev;
struct drm_panel panel;
+   enum drm_panel_orientation orientation;
struct gpio_desc *reset_gpio;
struct regulator *vdd;
struct regulator *iovcc;
@@ -216,6 +217,7 @@ static int kd35t133_get_modes(struct drm_panel *panel,
connector->display_info.width_mm = mode->width_mm;
connector->display_info.height_mm = mode->height_mm;
drm_mode_probed_add(connector, mode);
+   drm_connector_set_panel_orientation(connector, ctx->orientation);
 
return 1;
 }
@@ -258,6 +260,12 @@ static int kd35t133_probe(struct mipi_dsi_device *dsi)
return ret;
}
 
+   ret = of_drm_get_panel_orientation(dev->of_node, &ctx->orientation);
+   if (ret) {
+   dev_err(dev, "%pOF: failed to get orientation %d\n", 
dev->of_node, ret);
+   return ret;
+   }
+
mipi_dsi_set_drvdata(dsi, ctx);
 
ctx->dev = dev;
-- 
2.30.0

[PATCH 00/12] hantro: Enable H.264 VDPU2 (Odroid Advance Go)

2021-06-24 Thread Ezequiel Garcia

This series adds support for H.264 decoding on the PX30, RK3328
and RK3326 platforms, enabling the VDPU2 core.

Given it's tested on the Odroid Advance Go, patches 1 and 2
add the basic support to report the panel orientation to
userspace (Heiko, if you like them, feel free to pick them).
Weston (for instance) picks up the orientation automagically
and rotates the render.

Patches 3 and 4 are just low-hanging fruit that was on my backlog.

Patches 5, 6 and 7 add some helpers to avoid duplicating some processes
between Hantro G1 and VDPU2. Patches 8 and 9 enable the VDPU2 H.264.
The implementation is based on a patch from Jonas Karlman [1], which
I forwarded ported to mainline.

Finally, patches 10 to 12 add support for the VPU on Rockchip PX30 SoCs.
These patches are based on patches submitted by Paul Kocialkowski [2],
which I ported and adjusted a bit.

Tested on i.MX8MQ EVK and RK3326 Odroid Advance Go, the latter
is able to decode a 1080p sample at ~100fps nicely.

Fluster conformance testing is looking good as well, and producing
expected results:

RK3326:
  Ran 135 tests in 480.067s
  FAILED (failures=9, errors=54)

i.MX8MQ:
  Ran 135 tests in 337.491s
  FAILED (failures=9, errors=54)

[1] 
https://lore.kernel.org/linux-media/he1pr06mb40119de07d38060f531d1070ac...@he1pr06mb4011.eurprd06.prod.outlook.com/
[2] https://lore.kernel.org/patchwork/cover/1361795/

Ezequiel Garcia (8):
  drm/panel: kd35t133: Add panel orientation support
  arm64: dts: rockchip: Add panel orientation to Odroid Go Advance
  hantro: vp8: Move noisy WARN_ON to vpu_debug
  hantro: Make struct hantro_variant.init() optional
  media: hantro: Avoid redundant hantro_get_{dst,src}_buf() calls
  media: hantro: h264: Move DPB valid and long-term bitmaps
  media: hantro: h264: Move reference picture number to a helper
  media: hantro: Enable H.264 on Rockchip VDPU2

Jonas Karlman (1):
  media: hantro: Add H.264 support for Rockchip VDPU2

Paul Kocialkowski (3):
  dt-bindings: media: rockchip-vpu: Add PX30 compatible
  arm64: dts: rockchip: Add VPU support for the PX30
  media: hantro: Add support for the Rockchip PX30

 .../bindings/media/rockchip-vpu.yaml  |   3 +
 arch/arm64/boot/dts/rockchip/px30.dtsi|  23 +
 .../boot/dts/rockchip/rk3326-odroid-go2.dts   |   1 +
 drivers/gpu/drm/panel/panel-elida-kd35t133.c  |   8 +
 drivers/staging/media/hantro/Makefile |   1 +
 drivers/staging/media/hantro/hantro.h |   4 +-
 drivers/staging/media/hantro/hantro_drv.c |  11 +-
 .../staging/media/hantro/hantro_g1_h264_dec.c |  48 +-
 .../staging/media/hantro/hantro_g1_vp8_dec.c  |  31 +-
 drivers/staging/media/hantro/hantro_h264.c|  24 +
 drivers/staging/media/hantro/hantro_hw.h  |   8 +
 .../media/hantro/rockchip_vpu2_hw_h264_dec.c  | 491 ++
 .../media/hantro/rockchip_vpu2_hw_vp8_dec.c   |  32 +-
 .../staging/media/hantro/rockchip_vpu_hw.c|  54 +-
 .../staging/media/hantro/sama5d4_vdec_hw.c|   6 -
 15 files changed, 671 insertions(+), 74 deletions(-)
 create mode 100644 drivers/staging/media/hantro/rockchip_vpu2_hw_h264_dec.c

-- 
2.30.0

Re: [PATCH 3/6] KVM: x86/mmu: avoid struct page in MMU

2021-06-24 Thread Nicholas Piggin

Excerpts from Marc Zyngier's message of June 24, 2021 8:06 pm:
> On Thu, 24 Jun 2021 09:58:00 +0100,
> Nicholas Piggin  wrote:
>> 
>> Excerpts from David Stevens's message of June 24, 2021 1:57 pm:
>> > From: David Stevens 
>> >  out_unlock:
>> >if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))
>> >read_unlock(&vcpu->kvm->mmu_lock);
>> >else
>> >write_unlock(&vcpu->kvm->mmu_lock);
>> > -  kvm_release_pfn_clean(pfn);
>> > +  if (pfnpg.page)
>> > +  put_page(pfnpg.page);
>> >return r;
>> >  }
>> 
>> How about
>> 
>>   kvm_release_pfn_page_clean(pfnpg);
> 
> I'm not sure. I always found kvm_release_pfn_clean() ugly, because it
> doesn't mark the page 'clean'. I find put_page() more correct.
> 
> Something like 'kvm_put_pfn_page()' would make more sense, but I'm so
> bad at naming things that I could just as well call it 'bob()'.

That seems like a fine name to me. A little better than bob.

Thanks,
Nick

Re: [PATCH 02/12] arm64: dts: rockchip: Add panel orientation to Odroid Go Advance

2021-06-24 Thread Heiko Stübner

Am Donnerstag, 24. Juni 2021, 20:26:02 CEST schrieb Ezequiel Garcia:
> The Odroid Go Advance panel is rotated, so let's reflect this
> in the device tree.
> 
> Signed-off-by: Ezequiel Garcia 

similar patch already applied for 5.14:
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts?id=edb39de5d731f147c7b08c4a5eb246ae1dbdd947

> ---
>  arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts 
> b/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts
> index 49c97f76df77..cca19660e60a 100644
> --- a/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts
> +++ b/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts
> @@ -240,6 +240,7 @@ panel@0 {
>   iovcc-supply = <&vcc_lcd>;
>   reset-gpios = <&gpio3 RK_PC0 GPIO_ACTIVE_LOW>;
>   vdd-supply = <&vcc_lcd>;
> + rotation = <270>;
>  
>   port {
>   mipi_in_panel: endpoint {
>

Re: [PATCH 01/12] drm/panel: kd35t133: Add panel orientation support

2021-06-24 Thread Heiko Stübner

Am Donnerstag, 24. Juni 2021, 20:26:01 CEST schrieb Ezequiel Garcia:
> Parse the device tree rotation specifier, and set a DRM
> connector orientation property. The property can then be read
> by compositors to apply hardware plane rotation or a GPU transform.
> 
> Signed-off-by: Ezequiel Garcia 

similar patch already applied for 5.14:
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/drivers/gpu/drm/panel/panel-elida-kd35t133.c?id=610d9c311b1387f8c4ac602fee1f2a1cb0508707

> ---
>  drivers/gpu/drm/panel/panel-elida-kd35t133.c | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/drivers/gpu/drm/panel/panel-elida-kd35t133.c 
> b/drivers/gpu/drm/panel/panel-elida-kd35t133.c
> index fe5ac3ef9018..5987d28c874c 100644
> --- a/drivers/gpu/drm/panel/panel-elida-kd35t133.c
> +++ b/drivers/gpu/drm/panel/panel-elida-kd35t133.c
> @@ -39,6 +39,7 @@
>  struct kd35t133 {
>   struct device *dev;
>   struct drm_panel panel;
> + enum drm_panel_orientation orientation;
>   struct gpio_desc *reset_gpio;
>   struct regulator *vdd;
>   struct regulator *iovcc;
> @@ -216,6 +217,7 @@ static int kd35t133_get_modes(struct drm_panel *panel,
>   connector->display_info.width_mm = mode->width_mm;
>   connector->display_info.height_mm = mode->height_mm;
>   drm_mode_probed_add(connector, mode);
> + drm_connector_set_panel_orientation(connector, ctx->orientation);
>  
>   return 1;
>  }
> @@ -258,6 +260,12 @@ static int kd35t133_probe(struct mipi_dsi_device *dsi)
>   return ret;
>   }
>  
> + ret = of_drm_get_panel_orientation(dev->of_node, &ctx->orientation);
> + if (ret) {
> + dev_err(dev, "%pOF: failed to get orientation %d\n", 
> dev->of_node, ret);
> + return ret;
> + }
> +
>   mipi_dsi_set_drvdata(dsi, ctx);
>  
>   ctx->dev = dev;
>

[PATCH 4/4] drm/i915/gem: Migrate to system at dma-buf map time

2021-06-24 Thread Thomas Hellström

Until we support p2p dma or as a complement to that, migrate data
to system memory at dma-buf map time if possible.

Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 616c3a2f1baf..a52f885bc09a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -25,7 +25,14 @@ static struct sg_table *i915_gem_map_dma_buf(struct 
dma_buf_attachment *attachme
struct scatterlist *src, *dst;
int ret, i;
 
-   ret = i915_gem_object_pin_pages_unlocked(obj);
+   ret = i915_gem_object_lock_interruptible(obj, NULL);
+   if (ret)
+   return ERR_PTR(ret);
+
+   ret = i915_gem_object_migrate(obj, NULL, INTEL_REGION_SMEM);
+   if (!ret)
+   ret = i915_gem_object_pin_pages(obj);
+   i915_gem_object_unlock(obj);
if (ret)
goto err;
 
-- 
2.31.1

[PATCH 3/4] drm/i915/display: Migrate objects to LMEM if possible for display

2021-06-24 Thread Thomas Hellström

Objects intended to be used as display framebuffers must reside in
LMEM for discrete. If they happen to not do that, migrate them to
LMEM before pinning.

Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/display/intel_display.c |  5 -
 drivers/gpu/drm/i915/gem/i915_gem_domain.c   |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 21 
 drivers/gpu/drm/i915/gem/i915_gem_object.h   |  2 --
 4 files changed, 5 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
b/drivers/gpu/drm/i915/display/intel_display.c
index 4524dbfa5e42..83a4aba54d67 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -1331,6 +1331,9 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
ret = i915_gem_object_lock(obj, &ww);
if (!ret && phys_cursor)
ret = i915_gem_object_attach_phys(obj, alignment);
+   else if (!ret && HAS_LMEM(dev_priv))
+   ret = i915_gem_object_migrate(obj, &ww, INTEL_REGION_LMEM);
+   /* TODO: Do we need to sync when migration becomes async? */
if (!ret)
ret = i915_gem_object_pin_pages(obj);
if (ret)
@@ -11770,7 +11773,7 @@ intel_user_framebuffer_create(struct drm_device *dev,
 
/* object is backed with LMEM for discrete */
i915 = to_i915(obj->base.dev);
-   if (HAS_LMEM(i915) && !i915_gem_object_validates_to_lmem(obj)) {
+   if (HAS_LMEM(i915) && !i915_gem_object_can_migrate(obj, 
INTEL_REGION_LMEM)) {
/* object is "remote", not in local memory */
i915_gem_object_put(obj);
return ERR_PTR(-EREMOTE);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c 
b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 073822100da7..7d1400b13429 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -375,7 +375,7 @@ i915_gem_object_pin_to_display_plane(struct 
drm_i915_gem_object *obj,
struct i915_vma *vma;
int ret;
 
-   /* Frame buffer must be in LMEM (no migration yet) */
+   /* Frame buffer must be in LMEM */
if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
return ERR_PTR(-EINVAL);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
index 41d5182cd367..be1d122574af 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
@@ -23,27 +23,6 @@ i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj,
return io_mapping_map_wc(&obj->mm.region->iomap, offset, size);
 }
 
-/**
- * i915_gem_object_validates_to_lmem - Whether the object is resident in
- * lmem when pages are present.
- * @obj: The object to check.
- *
- * Migratable objects residency may change from under us if the object is
- * not pinned or locked. This function is intended to be used to check whether
- * the object can only reside in lmem when pages are present.
- *
- * Return: Whether the object is always resident in lmem when pages are
- * present.
- */
-bool i915_gem_object_validates_to_lmem(struct drm_i915_gem_object *obj)
-{
-   struct intel_memory_region *mr = READ_ONCE(obj->mm.region);
-
-   return !i915_gem_object_migratable(obj) &&
-   mr && (mr->type == INTEL_MEMORY_LOCAL ||
-  mr->type == INTEL_MEMORY_STOLEN_LOCAL);
-}
-
 /**
  * i915_gem_object_is_lmem - Whether the object is resident in
  * lmem
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 8cbd7a5334e2..d423d8cac4f2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -597,8 +597,6 @@ bool i915_gem_object_evictable(struct drm_i915_gem_object 
*obj);
 
 bool i915_gem_object_migratable(struct drm_i915_gem_object *obj);
 
-bool i915_gem_object_validates_to_lmem(struct drm_i915_gem_object *obj);
-
 int i915_gem_object_migrate(struct drm_i915_gem_object *obj,
struct i915_gem_ww_ctx *ww,
enum intel_region_id id);
-- 
2.31.1

[PATCH 2/4] drm/i915/gem: Introduce a selftest for the gem object migrate functionality

2021-06-24 Thread Thomas Hellström

From: Matthew Auld 

A selftest for the gem object migrate functionality. Slightly adapted
from the original by Matthew to the new interface and new fill blit
code.

Co-developed-by: Thomas Hellström 
Signed-off-by: Thomas Hellström 
Signed-off-by: Matthew Auld 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c|   1 +
 .../drm/i915/gem/selftests/i915_gem_migrate.c | 237 ++
 .../drm/i915/selftests/i915_live_selftests.h  |   1 +
 3 files changed, 239 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 6421c3a8b2f3..24f4395bf387 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -644,6 +644,7 @@ static const struct drm_gem_object_funcs 
i915_gem_object_funcs = {
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/huge_gem_object.c"
 #include "selftests/huge_pages.c"
+#include "selftests/i915_gem_migrate.c"
 #include "selftests/i915_gem_object.c"
 #include "selftests/i915_gem_coherency.c"
 #endif
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
new file mode 100644
index ..a437b66f64d9
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020-2021 Intel Corporation
+ */
+
+#include "gt/intel_migrate.h"
+
+static int igt_smem_create_migrate(void *arg)
+{
+   struct intel_gt *gt = arg;
+   struct drm_i915_private *i915 = gt->i915;
+   struct drm_i915_gem_object *obj;
+   struct i915_gem_ww_ctx ww;
+   int err = 0;
+
+   /* Switch object backing-store on create */
+   obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, 0);
+   if (IS_ERR(obj))
+   return PTR_ERR(obj);
+
+   for_i915_gem_ww(&ww, err, true) {
+   err = i915_gem_object_lock(obj, &ww);
+   if (err)
+   continue;
+
+   if (!i915_gem_object_can_migrate(obj, INTEL_REGION_SMEM)) {
+   err = -EINVAL;
+   continue;
+   }
+
+   err = i915_gem_object_migrate(obj, &ww, INTEL_REGION_SMEM);
+   if (err)
+   continue;
+
+   err = i915_gem_object_pin_pages(obj);
+   if (err)
+   continue;
+
+   if (i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM))
+   err = -EINVAL;
+
+   i915_gem_object_unpin_pages(obj);
+   }
+   i915_gem_object_put(obj);
+
+   return err;
+}
+
+static int igt_lmem_create_migrate(void *arg)
+{
+   struct intel_gt *gt = arg;
+   struct drm_i915_private *i915 = gt->i915;
+   struct drm_i915_gem_object *obj;
+   struct i915_gem_ww_ctx ww;
+   int err = 0;
+
+   /* Switch object backing-store on create */
+   obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
+   if (IS_ERR(obj))
+   return PTR_ERR(obj);
+
+   for_i915_gem_ww(&ww, err, true) {
+   err = i915_gem_object_lock(obj, &ww);
+   if (err)
+   continue;
+
+   if (!i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM)) {
+   err = -EINVAL;
+   continue;
+   }
+
+   err = i915_gem_object_migrate(obj, &ww, INTEL_REGION_LMEM);
+   if (err)
+   continue;
+
+   err = i915_gem_object_pin_pages(obj);
+   if (err)
+   continue;
+
+   if (i915_gem_object_can_migrate(obj, INTEL_REGION_SMEM))
+   err = -EINVAL;
+
+   i915_gem_object_unpin_pages(obj);
+   }
+   i915_gem_object_put(obj);
+
+   return err;
+}
+
+static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww,
+ struct drm_i915_gem_object *obj)
+{
+   int err;
+
+   err = i915_gem_object_lock(obj, ww);
+   if (err)
+   return err;
+
+   err = i915_gem_object_wait(obj,
+  I915_WAIT_INTERRUPTIBLE |
+  I915_WAIT_PRIORITY |
+  I915_WAIT_ALL,
+  MAX_SCHEDULE_TIMEOUT);
+   if (err)
+   return err;
+
+   if (i915_gem_object_is_lmem(obj)) {
+   if (!i915_gem_object_can_migrate(obj, INTEL_REGION_SMEM)) {
+   pr_err("object can't migrate to smem.\n");
+   return -EINVAL;
+   }
+
+   err = i915_gem_object_migrate(obj, ww, INTEL_REGION_SMEM);
+   if (err) {
+   pr_err("Object failed migration to smem\n");
+   if (err)
+

[PATCH 1/4] drm/i915/gem: Implement object migration

2021-06-24 Thread Thomas Hellström

Introduce an interface to migrate objects between regions.
This is primarily intended to migrate objects to LMEM for display and
to SYSTEM for dma-buf, but might be reused in one form or another for
performande-based migration.

Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 91 +++
 drivers/gpu/drm/i915/gem/i915_gem_object.h| 12 +++
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  9 ++
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   | 69 ++
 drivers/gpu/drm/i915/gem/i915_gem_wait.c  | 19 
 5 files changed, 183 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 07e8ff9a8aae..6421c3a8b2f3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -513,6 +513,97 @@ bool i915_gem_object_has_iomem(const struct 
drm_i915_gem_object *obj)
return obj->mem_flags & I915_BO_FLAG_IOMEM;
 }
 
+/**
+ * i915_gem_object_can_migrate - Whether an object likely can be migrated
+ *
+ * @obj: The object to migrate
+ * @id: The region intended to migrate to
+ *
+ * Check whether the object backend supports migration to the
+ * given region. Note that pinning may affect the ability to migrate.
+ *
+ * Return: true if migration is possible, false otherwise.
+ */
+bool i915_gem_object_can_migrate(struct drm_i915_gem_object *obj,
+enum intel_region_id id)
+{
+   struct drm_i915_private *i915 = to_i915(obj->base.dev);
+   unsigned int num_allowed = obj->mm.n_placements;
+   struct intel_memory_region *mr;
+   unsigned int i;
+
+   GEM_BUG_ON(id >= INTEL_REGION_UNKNOWN);
+   GEM_BUG_ON(obj->mm.madv != I915_MADV_WILLNEED);
+
+   if (!obj->ops->migrate)
+   return -EOPNOTSUPP;
+
+   mr = i915->mm.regions[id];
+   if (obj->mm.region == mr)
+   return true;
+
+   if (!i915_gem_object_evictable(obj))
+   return false;
+
+   if (!(obj->flags & I915_BO_ALLOC_USER))
+   return true;
+
+   if (num_allowed == 0)
+   return false;
+
+   for (i = 0; i < num_allowed; ++i) {
+   if (mr == obj->mm.placements[i])
+   return true;
+   }
+
+   return false;
+}
+
+/**
+ * i915_gem_object_migrate - Migrate an object to the desired region id
+ * @obj: The object to migrate.
+ * @ww: An optional struct i915_gem_ww_ctx. If NULL, the backend may
+ * not be successful in evicting other objects to make room for this object.
+ * @id: The region id to migrate to.
+ *
+ * Attempt to migrate the object to the desired memory region. The
+ * object backend must support migration and the object may not be
+ * pinned, (explicitly pinned pages or pinned vmas). The object must
+ * be locked.
+ * On successful completion, the object will have pages pointing to
+ * memory in the new region, but an async migration task may not have
+ * completed yet, and to accomplish that, i915_gem_object_wait_migration()
+ * must be called.
+ *
+ * Return: 0 on success. Negative error code on failure. In particular may
+ * return -ENXIO on lack of region space, -EDEADLK for deadlock avoidance
+ * if @ww is set, -EINTR or -ERESTARTSYS if signal pending, and
+ * -EBUSY if the object is pinned.
+ */
+int i915_gem_object_migrate(struct drm_i915_gem_object *obj,
+   struct i915_gem_ww_ctx *ww,
+   enum intel_region_id id)
+{
+   struct drm_i915_private *i915 = to_i915(obj->base.dev);
+   struct intel_memory_region *mr;
+
+   GEM_BUG_ON(id >= INTEL_REGION_UNKNOWN);
+   GEM_BUG_ON(obj->mm.madv != I915_MADV_WILLNEED);
+   assert_object_held(obj);
+
+   mr = i915->mm.regions[id];
+   if (obj->mm.region == mr)
+   return 0;
+
+   if (!i915_gem_object_evictable(obj))
+   return -EBUSY;
+
+   if (!obj->ops->migrate)
+   return -EOPNOTSUPP;
+
+   return obj->ops->migrate(obj, mr);
+}
+
 void i915_gem_init__objects(struct drm_i915_private *i915)
 {
INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index ea3224a480c4..8cbd7a5334e2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -17,6 +17,8 @@
 #include "i915_gem_ww.h"
 #include "i915_vma_types.h"
 
+enum intel_region_id;
+
 /*
  * XXX: There is a prevalence of the assumption that we fit the
  * object's page count inside a 32bit _signed_ variable. Let's document
@@ -597,6 +599,16 @@ bool i915_gem_object_migratable(struct drm_i915_gem_object 
*obj);
 
 bool i915_gem_object_validates_to_lmem(struct drm_i915_gem_object *obj);
 
+int i915_gem_object_migrate(struct drm_i915_gem_object *obj,
+   struct i915_gem_ww_ctx *ww,
+

[PATCH 0/4] drm/i915/gem: Introduce a migrate interface

2021-06-24 Thread Thomas Hellström

We want to be able to explicitly migrate objects between gem memory
regions, initially for display and dma-buf, but there might be more
use-cases coming up.

Introduce a gem migrate interface, add a selftest and use it for
display fb pinning and dma-buf mapping.

This series should make the desktop light up on DG1 with DG1-enabled
mesa.

Matthew Auld (1):
  drm/i915/gem: Introduce a selftest for the gem object migrate
functionality

Thomas Hellström (3):
  drm/i915/gem: Implement object migration
  drm/i915/display: Migrate objects to LMEM if possible for display
  drm/i915/gem: Migrate to system at dma-buf map time

 drivers/gpu/drm/i915/display/intel_display.c  |   5 +-
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c|   9 +-
 drivers/gpu/drm/i915/gem/i915_gem_domain.c|   2 +-
 drivers/gpu/drm/i915/gem/i915_gem_lmem.c  |  21 --
 drivers/gpu/drm/i915/gem/i915_gem_object.c|  92 +++
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  12 +-
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |   9 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   |  69 +++--
 drivers/gpu/drm/i915/gem/i915_gem_wait.c  |  19 ++
 .../drm/i915/gem/selftests/i915_gem_migrate.c | 237 ++
 .../drm/i915/selftests/i915_live_selftests.h  |   1 +
 11 files changed, 434 insertions(+), 42 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c

-- 
2.31.1

Re: [PATCH 01/11] drm/sched: Split drm_sched_job_init

2021-06-24 Thread Daniel Vetter

On Thu, Jun 24, 2021 at 7:39 PM Christian König
 wrote:
>
>
>
> Am 24.06.21 um 19:37 schrieb Daniel Vetter:
> > On Thu, Jun 24, 2021 at 7:30 PM Christian König
> >  wrote:
> >> Am 24.06.21 um 16:00 schrieb Daniel Vetter:
> >>> This is a very confusingly named function, because not just does it
> >>> init an object, it arms it and provides a point of no return for
> >>> pushing a job into the scheduler. It would be nice if that's a bit
> >>> clearer in the interface.
> >> We originally had that in the push_job interface, but moved that to init
> >> for some reason I don't remember.
> >>
> >>> But the real reason is that I want to push the dependency tracking
> >>> helpers into the scheduler code, and that means drm_sched_job_init
> >>> must be called a lot earlier, without arming the job.
> >> I'm really questioning myself if I like that naming.
> >>
> >> What about using drm_sched_job_add_dependency instead?
> > You're suggesting a
> > s/drm_sched_job_init/drm_sched_job_add_dependency/, or just replied to
> > the wrong patch?
>
> Replied to the wrong patch accidentally. I was talking about the "await"
> terminology.

Can you pls reply there so we don't have too much of a confusion in
the discussion?
Thanks, Daniel


> Christian.
>
> > -Daniel
> >
> >> Christian.
> >>
> >>> Signed-off-by: Daniel Vetter 
> >>> Cc: Lucas Stach 
> >>> Cc: Russell King 
> >>> Cc: Christian Gmeiner 
> >>> Cc: Qiang Yu 
> >>> Cc: Rob Herring 
> >>> Cc: Tomeu Vizoso 
> >>> Cc: Steven Price 
> >>> Cc: Alyssa Rosenzweig 
> >>> Cc: David Airlie 
> >>> Cc: Daniel Vetter 
> >>> Cc: Sumit Semwal 
> >>> Cc: "Christian König" 
> >>> Cc: Masahiro Yamada 
> >>> Cc: Kees Cook 
> >>> Cc: Adam Borowski 
> >>> Cc: Nick Terrell 
> >>> Cc: Mauro Carvalho Chehab 
> >>> Cc: Paul Menzel 
> >>> Cc: Sami Tolvanen 
> >>> Cc: Viresh Kumar 
> >>> Cc: Alex Deucher 
> >>> Cc: Dave Airlie 
> >>> Cc: Nirmoy Das 
> >>> Cc: Deepak R Varma 
> >>> Cc: Lee Jones 
> >>> Cc: Kevin Wang 
> >>> Cc: Chen Li 
> >>> Cc: Luben Tuikov 
> >>> Cc: "Marek Olšák" 
> >>> Cc: Dennis Li 
> >>> Cc: Maarten Lankhorst 
> >>> Cc: Andrey Grodzovsky 
> >>> Cc: Sonny Jiang 
> >>> Cc: Boris Brezillon 
> >>> Cc: Tian Tao 
> >>> Cc: Jack Zhang 
> >>> Cc: etna...@lists.freedesktop.org
> >>> Cc: l...@lists.freedesktop.org
> >>> Cc: linux-me...@vger.kernel.org
> >>> Cc: linaro-mm-...@lists.linaro.org
> >>> ---
> >>>.gitignore   |  1 +
> >>>drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  2 ++
> >>>drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  |  2 ++
> >>>drivers/gpu/drm/etnaviv/etnaviv_sched.c  |  2 ++
> >>>drivers/gpu/drm/lima/lima_sched.c|  2 ++
> >>>drivers/gpu/drm/panfrost/panfrost_job.c  |  2 ++
> >>>drivers/gpu/drm/scheduler/sched_entity.c |  6 +++---
> >>>drivers/gpu/drm/scheduler/sched_fence.c  | 15 ++-
> >>>drivers/gpu/drm/scheduler/sched_main.c   | 23 ++-
> >>>include/drm/gpu_scheduler.h  |  6 +-
> >>>10 files changed, 51 insertions(+), 10 deletions(-)
> >>>
> >>> diff --git a/.gitignore b/.gitignore
> >>> index 7afd412dadd2..52433a930299 100644
> >>> --- a/.gitignore
> >>> +++ b/.gitignore
> >>> @@ -66,6 +66,7 @@ modules.order
> >>>/modules.builtin
> >>>/modules.builtin.modinfo
> >>>/modules.nsdeps
> >>> +*.builtin
> >>>
> >>>#
> >>># RPM spec file (make rpm-pkg)
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> >>> index c5386d13eb4a..a4ec092af9a7 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> >>> @@ -1226,6 +1226,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser 
> >>> *p,
> >>>if (r)
> >>>goto error_unlock;
> >>>
> >>> + drm_sched_job_arm(&job->base);
> >>> +
> >>>/* No memory allocation is allowed while holding the notifier lock.
> >>> * The lock is held until amdgpu_cs_submit is finished and fence is
> >>> * added to BOs.
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> >>> index d33e6d97cc89..5ddb955d2315 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> >>> @@ -170,6 +170,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct 
> >>> drm_sched_entity *entity,
> >>>if (r)
> >>>return r;
> >>>
> >>> + drm_sched_job_arm(&job->base);
> >>> +
> >>>*f = dma_fence_get(&job->base.s_fence->finished);
> >>>amdgpu_job_free_resources(job);
> >>>drm_sched_entity_push_job(&job->base, entity);
> >>> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c 
> >>> b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> >>> index 19826e504efc..af1671f01c7f 100644
> >>> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> >>> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> >>> @@ -163,6 +163,8 @@ int etnaviv_sched_push_job(s

Re: [PATCH 10/11] drm/scheduler: Don't store self-dependencies

2021-06-24 Thread Daniel Vetter

On Thu, Jun 24, 2021 at 7:56 PM Christian König
 wrote:
>
> Am 24.06.21 um 19:43 schrieb Daniel Vetter:
> > On Thu, Jun 24, 2021 at 7:38 PM Christian König
> >  wrote:
> >> Am 24.06.21 um 19:29 schrieb Daniel Vetter:
> >>> On Thu, Jun 24, 2021 at 07:03:10PM +0200, Christian König wrote:
>  Am 24.06.21 um 16:00 schrieb Daniel Vetter:
> > This is essentially part of drm_sched_dependency_optimized(), which
> > only amdgpu seems to make use of. Use it a bit more.
> >
> > Signed-off-by: Daniel Vetter 
> > Cc: "Christian König" 
> > Cc: Daniel Vetter 
> > Cc: Luben Tuikov 
> > Cc: Andrey Grodzovsky 
> > Cc: Alex Deucher 
> > Cc: Jack Zhang 
> > ---
> > drivers/gpu/drm/scheduler/sched_main.c | 7 +++
> > 1 file changed, 7 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
> > b/drivers/gpu/drm/scheduler/sched_main.c
> > index 370c336d383f..c31d7cf7df74 100644
> > --- a/drivers/gpu/drm/scheduler/sched_main.c
> > +++ b/drivers/gpu/drm/scheduler/sched_main.c
> > @@ -649,6 +649,13 @@ int drm_sched_job_await_fence(struct drm_sched_job 
> > *job,
> >  if (!fence)
> >  return 0;
> > +   /* if it's a fence from us it's guaranteed to be earlier */
> > +   if (fence->context == job->entity->fence_context ||
> > +   fence->context == job->entity->fence_context + 1) {
> > +   dma_fence_put(fence);
> > +   return 0;
> > +   }
> > +
>  Well NAK. That would break Vulkan.
> > I'm assuming your reply means the NAK is retracted and was just the
> > usual "this doesn't perfectly fit for amdgpu" reflex?
>
> Well rather "NAK, you haven't considered that special handling in amdgpu
> and if you really want to unify this you need that as well."
>
> >
>  The problem is that Vulkan can insert dependencies between jobs which 
>  run on
>  the same queue.
> 
>  So we need to track those as well and if the previous job for the same
>  queue/scheduler is not yet finished a pipeline synchronization needs to 
>  be
>  inserted.
> 
>  That's one of the reasons we wasn't able to unify the dependency handling
>  yet.
> >>> That sounds like an extremely amdgpu specific constraint?
> >> Yeah, that's totally hardware specific.
> >>
> >> It's just that I don't know how else we could track that without having
> >> the same separation as in amdgpu between implicit and explicit fences.
> >> And as far as I understand it that's exactly what you want to avoid.
> >>
> >> As I said this turned out to be really awkward.
> >>
> >>> You're also the
> >>> only one who keeps track of whether the previous job we've scheduled has
> >>> finished already (I guess they can get pipelined and you don't flush by
> >>> default), so you insert fences.
> >> Yes, exactly that.
> >>
> >>> I guess we can add a await_fence_no_dedup or so for amdgpu, but I'm not
> >>> sure why we have to inflict this design constraint on all other drivers?
> >>> At least I'm not seeing anything in lima, panfrost, v3d or entaviv that
> >>> would break with this, and i915 will also be perfectly fine.
> >>>
> >>> Also note: I'm not using this for amdgpu, exactly because there's a few
> >>> funny things going on.
> >> Yeah, exactly the reason why we never unified this.
> > Yeah there's clear limits to this, because you also can't use the
> > await_implicit helper, because you have to keep filtering for owner or
> > the current amdgpu uapi goes horribly slow. I think the benefit would
> > be just that we could share the datastructure and the book-keeping,
> > but aside from that you'd need your own integration in amdgpu.
>
> Yeah, but that is trivial. The _add_dependency() function (or however we
> want to call it) needs to be exported anyway for adding fences from
> syncfile and syncobj.
>
> Or do you also want to unify the handling for those?

I guess we could add some convenience wrapper that pulls in a
sync_file or sync_objc automatically. But there's not that much code
involved there, and it's also not tricky. Also drivers might need to
add dependencies for whatever anyway. The await_implicit is a bit
different, because that defines how implicit sync is supposed to work.

I guess the bikeshed then boils down to which one is the simple
await_fence() function. The one that filters for same timeline, or the
one that doesnt. I'd make the non-filtering one the special case so
that amdgpu sticks out a bit more - out of 6 drivers with schedulers
(i915 included) it seems to be the special one.

> > One idea I just had was whether we could use the tag bits xarray has
> > for the amdgpu purposed. Like we could do a
> > drm_sched_job_await_fence_tagged, where you supply additional
> > information (like the "this might be relevant for the vm_flush" and
> > things like that). Afaiui xarray tags are very fast to enumerate on if
> > you're looking for specific tags, but I

[Bug 213561] [bisected][regression] GFX10 AMDGPUs can no longer enter idle state after commit. Commit has been pushed to stable branches too.

2021-06-24 Thread bugzilla-daemon

https://bugzilla.kernel.org/show_bug.cgi?id=213561

--- Comment #8 from hagar-du...@wanadoo.fr ---
Thanks for pointing to a different commit. I don't really have the time
currently to revert a specific commit to try it out, pointing out the problem
happening between two consecutive kernel versions should be enough TBH for the
author to know what this is about.

I don't mind filling another bug if you insist, it would be nice to have the
dev show up here and state if that's necessary; the problem might not affect
the same hwid, but it's basically identical, I wouldn't be surprised if I open
a bug the dev decides it's a duplicate.

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.

Re: [PATCH] dma-buf/sync_file: Don't leak fences on merge failure

2021-06-24 Thread Christian König


Am 24.06.21 um 19:47 schrieb Jason Ekstrand:

Each add_fence() call does a dma_fence_get() on the relevant fence.  In
the error path, we weren't calling dma_fence_put() so all those fences
got leaked.  Also, in the krealloc_array failure case, we weren't
freeing the fences array.  Instead, ensure that i and fences are always
zero-initialized and dma_fence_put() all the fences and kfree(fences) on
every error path.

Signed-off-by: Jason Ekstrand 
Fixes: a02b9dc90d84 ("dma-buf/sync_file: refactor fence storage in struct 
sync_file")
Cc: Gustavo Padovan 
Cc: Christian König 


Reviewed-by: Christian König 


---
  drivers/dma-buf/sync_file.c | 13 +++--
  1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index 20d9bddbb985b..394e6e1e96860 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -211,8 +211,8 @@ static struct sync_file *sync_file_merge(const char *name, 
struct sync_file *a,
 struct sync_file *b)
  {
struct sync_file *sync_file;
-   struct dma_fence **fences, **nfences, **a_fences, **b_fences;
-   int i, i_a, i_b, num_fences, a_num_fences, b_num_fences;
+   struct dma_fence **fences = NULL, **nfences, **a_fences, **b_fences;
+   int i = 0, i_a, i_b, num_fences, a_num_fences, b_num_fences;
  
  	sync_file = sync_file_alloc();

if (!sync_file)
@@ -236,7 +236,7 @@ static struct sync_file *sync_file_merge(const char *name, 
struct sync_file *a,
 * If a sync_file can only be created with sync_file_merge
 * and sync_file_create, this is a reasonable assumption.
 */
-   for (i = i_a = i_b = 0; i_a < a_num_fences && i_b < b_num_fences; ) {
+   for (i_a = i_b = 0; i_a < a_num_fences && i_b < b_num_fences; ) {
struct dma_fence *pt_a = a_fences[i_a];
struct dma_fence *pt_b = b_fences[i_b];
  
@@ -277,15 +277,16 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,

fences = nfences;
}
  
-	if (sync_file_set_fence(sync_file, fences, i) < 0) {

-   kfree(fences);
+   if (sync_file_set_fence(sync_file, fences, i) < 0)
goto err;
-   }
  
  	strlcpy(sync_file->user_name, name, sizeof(sync_file->user_name));

return sync_file;
  
  err:

+   while (i)
+   dma_fence_put(fences[--i]);
+   kfree(fences);
fput(sync_file->file);
return NULL;

Re: [PATCH 10/11] drm/scheduler: Don't store self-dependencies

2021-06-24 Thread Christian König


Am 24.06.21 um 19:43 schrieb Daniel Vetter:

On Thu, Jun 24, 2021 at 7:38 PM Christian König
 wrote:

Am 24.06.21 um 19:29 schrieb Daniel Vetter:

On Thu, Jun 24, 2021 at 07:03:10PM +0200, Christian König wrote:

Am 24.06.21 um 16:00 schrieb Daniel Vetter:

This is essentially part of drm_sched_dependency_optimized(), which
only amdgpu seems to make use of. Use it a bit more.

Signed-off-by: Daniel Vetter 
Cc: "Christian König" 
Cc: Daniel Vetter 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky 
Cc: Alex Deucher 
Cc: Jack Zhang 
---
drivers/gpu/drm/scheduler/sched_main.c | 7 +++
1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index 370c336d383f..c31d7cf7df74 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -649,6 +649,13 @@ int drm_sched_job_await_fence(struct drm_sched_job *job,
 if (!fence)
 return 0;
+   /* if it's a fence from us it's guaranteed to be earlier */
+   if (fence->context == job->entity->fence_context ||
+   fence->context == job->entity->fence_context + 1) {
+   dma_fence_put(fence);
+   return 0;
+   }
+

Well NAK. That would break Vulkan.

I'm assuming your reply means the NAK is retracted and was just the
usual "this doesn't perfectly fit for amdgpu" reflex?


Well rather "NAK, you haven't considered that special handling in amdgpu 
and if you really want to unify this you need that as well."





The problem is that Vulkan can insert dependencies between jobs which run on
the same queue.

So we need to track those as well and if the previous job for the same
queue/scheduler is not yet finished a pipeline synchronization needs to be
inserted.

That's one of the reasons we wasn't able to unify the dependency handling
yet.

That sounds like an extremely amdgpu specific constraint?

Yeah, that's totally hardware specific.

It's just that I don't know how else we could track that without having
the same separation as in amdgpu between implicit and explicit fences.
And as far as I understand it that's exactly what you want to avoid.

As I said this turned out to be really awkward.


You're also the
only one who keeps track of whether the previous job we've scheduled has
finished already (I guess they can get pipelined and you don't flush by
default), so you insert fences.

Yes, exactly that.


I guess we can add a await_fence_no_dedup or so for amdgpu, but I'm not
sure why we have to inflict this design constraint on all other drivers?
At least I'm not seeing anything in lima, panfrost, v3d or entaviv that
would break with this, and i915 will also be perfectly fine.

Also note: I'm not using this for amdgpu, exactly because there's a few
funny things going on.

Yeah, exactly the reason why we never unified this.

Yeah there's clear limits to this, because you also can't use the
await_implicit helper, because you have to keep filtering for owner or
the current amdgpu uapi goes horribly slow. I think the benefit would
be just that we could share the datastructure and the book-keeping,
but aside from that you'd need your own integration in amdgpu.


Yeah, but that is trivial. The _add_dependency() function (or however we 
want to call it) needs to be exported anyway for adding fences from 
syncfile and syncobj.


Or do you also want to unify the handling for those?


One idea I just had was whether we could use the tag bits xarray has
for the amdgpu purposed. Like we could do a
drm_sched_job_await_fence_tagged, where you supply additional
information (like the "this might be relevant for the vm_flush" and
things like that). Afaiui xarray tags are very fast to enumerate on if
you're looking for specific tags, but I might be wrong. Ideally this
would avoid the need for the duplicated amdgpu_job->sched.


That could work.

Essentially we just need the information from the scheduler which is the 
last fence which was dependency optimized.


In other words when you push jobs like those to the same scheduler

J1
J2 -> depends on J1.
J3 -> depends on whatever, but not j2

The hardware needs to insert a flush between J2 and J1, but not between 
j3 and j2.


This makes roughly 19% performance difference for some OpenGL games and 
incorrect rendering for Vulkan if you mess it up either way or the other.


Regards,
Christian.




Cheers, Daniel



Regards,
Christian.


Finally: You _really_ need explicit dependency handling for vulkan in your
uapi, instead of the kernel second-guessing what userspace might be doing.
That's really not how vulkan is designed to work :-)
Cheers, Daniel



Christian.


 /* Deduplicate if we already depend on a fence from the same context.
  * This lets the size of the array of deps scale with the number of
  * engines involved, rather than the number of BOs.

[PATCH] dma-buf/sync_file: Don't leak fences on merge failure

2021-06-24 Thread Jason Ekstrand

Each add_fence() call does a dma_fence_get() on the relevant fence.  In
the error path, we weren't calling dma_fence_put() so all those fences
got leaked.  Also, in the krealloc_array failure case, we weren't
freeing the fences array.  Instead, ensure that i and fences are always
zero-initialized and dma_fence_put() all the fences and kfree(fences) on
every error path.

Signed-off-by: Jason Ekstrand 
Fixes: a02b9dc90d84 ("dma-buf/sync_file: refactor fence storage in struct 
sync_file")
Cc: Gustavo Padovan 
Cc: Christian König 
---
 drivers/dma-buf/sync_file.c | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index 20d9bddbb985b..394e6e1e96860 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -211,8 +211,8 @@ static struct sync_file *sync_file_merge(const char *name, 
struct sync_file *a,
 struct sync_file *b)
 {
struct sync_file *sync_file;
-   struct dma_fence **fences, **nfences, **a_fences, **b_fences;
-   int i, i_a, i_b, num_fences, a_num_fences, b_num_fences;
+   struct dma_fence **fences = NULL, **nfences, **a_fences, **b_fences;
+   int i = 0, i_a, i_b, num_fences, a_num_fences, b_num_fences;
 
sync_file = sync_file_alloc();
if (!sync_file)
@@ -236,7 +236,7 @@ static struct sync_file *sync_file_merge(const char *name, 
struct sync_file *a,
 * If a sync_file can only be created with sync_file_merge
 * and sync_file_create, this is a reasonable assumption.
 */
-   for (i = i_a = i_b = 0; i_a < a_num_fences && i_b < b_num_fences; ) {
+   for (i_a = i_b = 0; i_a < a_num_fences && i_b < b_num_fences; ) {
struct dma_fence *pt_a = a_fences[i_a];
struct dma_fence *pt_b = b_fences[i_b];
 
@@ -277,15 +277,16 @@ static struct sync_file *sync_file_merge(const char 
*name, struct sync_file *a,
fences = nfences;
}
 
-   if (sync_file_set_fence(sync_file, fences, i) < 0) {
-   kfree(fences);
+   if (sync_file_set_fence(sync_file, fences, i) < 0)
goto err;
-   }
 
strlcpy(sync_file->user_name, name, sizeof(sync_file->user_name));
return sync_file;
 
 err:
+   while (i)
+   dma_fence_put(fences[--i]);
+   kfree(fences);
fput(sync_file->file);
return NULL;
 
-- 
2.31.1

Re: [PATCH 10/11] drm/scheduler: Don't store self-dependencies

2021-06-24 Thread Daniel Vetter

On Thu, Jun 24, 2021 at 7:38 PM Christian König
 wrote:
>
> Am 24.06.21 um 19:29 schrieb Daniel Vetter:
> > On Thu, Jun 24, 2021 at 07:03:10PM +0200, Christian König wrote:
> >> Am 24.06.21 um 16:00 schrieb Daniel Vetter:
> >>> This is essentially part of drm_sched_dependency_optimized(), which
> >>> only amdgpu seems to make use of. Use it a bit more.
> >>>
> >>> Signed-off-by: Daniel Vetter 
> >>> Cc: "Christian König" 
> >>> Cc: Daniel Vetter 
> >>> Cc: Luben Tuikov 
> >>> Cc: Andrey Grodzovsky 
> >>> Cc: Alex Deucher 
> >>> Cc: Jack Zhang 
> >>> ---
> >>>drivers/gpu/drm/scheduler/sched_main.c | 7 +++
> >>>1 file changed, 7 insertions(+)
> >>>
> >>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
> >>> b/drivers/gpu/drm/scheduler/sched_main.c
> >>> index 370c336d383f..c31d7cf7df74 100644
> >>> --- a/drivers/gpu/drm/scheduler/sched_main.c
> >>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> >>> @@ -649,6 +649,13 @@ int drm_sched_job_await_fence(struct drm_sched_job 
> >>> *job,
> >>> if (!fence)
> >>> return 0;
> >>> +   /* if it's a fence from us it's guaranteed to be earlier */
> >>> +   if (fence->context == job->entity->fence_context ||
> >>> +   fence->context == job->entity->fence_context + 1) {
> >>> +   dma_fence_put(fence);
> >>> +   return 0;
> >>> +   }
> >>> +
> >> Well NAK. That would break Vulkan.

I'm assuming your reply means the NAK is retracted and was just the
usual "this doesn't perfectly fit for amdgpu" reflex?

> >> The problem is that Vulkan can insert dependencies between jobs which run 
> >> on
> >> the same queue.
> >>
> >> So we need to track those as well and if the previous job for the same
> >> queue/scheduler is not yet finished a pipeline synchronization needs to be
> >> inserted.
> >>
> >> That's one of the reasons we wasn't able to unify the dependency handling
> >> yet.
> > That sounds like an extremely amdgpu specific constraint?
>
> Yeah, that's totally hardware specific.
>
> It's just that I don't know how else we could track that without having
> the same separation as in amdgpu between implicit and explicit fences.
> And as far as I understand it that's exactly what you want to avoid.
>
> As I said this turned out to be really awkward.
>
> > You're also the
> > only one who keeps track of whether the previous job we've scheduled has
> > finished already (I guess they can get pipelined and you don't flush by
> > default), so you insert fences.
>
> Yes, exactly that.
>
> > I guess we can add a await_fence_no_dedup or so for amdgpu, but I'm not
> > sure why we have to inflict this design constraint on all other drivers?
> > At least I'm not seeing anything in lima, panfrost, v3d or entaviv that
> > would break with this, and i915 will also be perfectly fine.
> >
> > Also note: I'm not using this for amdgpu, exactly because there's a few
> > funny things going on.
>
> Yeah, exactly the reason why we never unified this.

Yeah there's clear limits to this, because you also can't use the
await_implicit helper, because you have to keep filtering for owner or
the current amdgpu uapi goes horribly slow. I think the benefit would
be just that we could share the datastructure and the book-keeping,
but aside from that you'd need your own integration in amdgpu.

One idea I just had was whether we could use the tag bits xarray has
for the amdgpu purposed. Like we could do a
drm_sched_job_await_fence_tagged, where you supply additional
information (like the "this might be relevant for the vm_flush" and
things like that). Afaiui xarray tags are very fast to enumerate on if
you're looking for specific tags, but I might be wrong. Ideally this
would avoid the need for the duplicated amdgpu_job->sched.

Cheers, Daniel


> Regards,
> Christian.
>
> > Finally: You _really_ need explicit dependency handling for vulkan in your
> > uapi, instead of the kernel second-guessing what userspace might be doing.
> > That's really not how vulkan is designed to work :-)
>
> >
> > Cheers, Daniel
> >
> >
> >> Christian.
> >>
> >>> /* Deduplicate if we already depend on a fence from the same context.
> >>>  * This lets the size of the array of deps scale with the number of
> >>>  * engines involved, rather than the number of BOs.
>


-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

Re: [PATCH 01/11] drm/sched: Split drm_sched_job_init

2021-06-24 Thread Christian König





Am 24.06.21 um 19:37 schrieb Daniel Vetter:

On Thu, Jun 24, 2021 at 7:30 PM Christian König
 wrote:

Am 24.06.21 um 16:00 schrieb Daniel Vetter:

This is a very confusingly named function, because not just does it
init an object, it arms it and provides a point of no return for
pushing a job into the scheduler. It would be nice if that's a bit
clearer in the interface.

We originally had that in the push_job interface, but moved that to init
for some reason I don't remember.


But the real reason is that I want to push the dependency tracking
helpers into the scheduler code, and that means drm_sched_job_init
must be called a lot earlier, without arming the job.

I'm really questioning myself if I like that naming.

What about using drm_sched_job_add_dependency instead?

You're suggesting a
s/drm_sched_job_init/drm_sched_job_add_dependency/, or just replied to
the wrong patch?


Replied to the wrong patch accidentally. I was talking about the "await" 
terminology.


Christian.


-Daniel


Christian.


Signed-off-by: Daniel Vetter 
Cc: Lucas Stach 
Cc: Russell King 
Cc: Christian Gmeiner 
Cc: Qiang Yu 
Cc: Rob Herring 
Cc: Tomeu Vizoso 
Cc: Steven Price 
Cc: Alyssa Rosenzweig 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: Sumit Semwal 
Cc: "Christian König" 
Cc: Masahiro Yamada 
Cc: Kees Cook 
Cc: Adam Borowski 
Cc: Nick Terrell 
Cc: Mauro Carvalho Chehab 
Cc: Paul Menzel 
Cc: Sami Tolvanen 
Cc: Viresh Kumar 
Cc: Alex Deucher 
Cc: Dave Airlie 
Cc: Nirmoy Das 
Cc: Deepak R Varma 
Cc: Lee Jones 
Cc: Kevin Wang 
Cc: Chen Li 
Cc: Luben Tuikov 
Cc: "Marek Olšák" 
Cc: Dennis Li 
Cc: Maarten Lankhorst 
Cc: Andrey Grodzovsky 
Cc: Sonny Jiang 
Cc: Boris Brezillon 
Cc: Tian Tao 
Cc: Jack Zhang 
Cc: etna...@lists.freedesktop.org
Cc: l...@lists.freedesktop.org
Cc: linux-me...@vger.kernel.org
Cc: linaro-mm-...@lists.linaro.org
---
   .gitignore   |  1 +
   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  2 ++
   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  |  2 ++
   drivers/gpu/drm/etnaviv/etnaviv_sched.c  |  2 ++
   drivers/gpu/drm/lima/lima_sched.c|  2 ++
   drivers/gpu/drm/panfrost/panfrost_job.c  |  2 ++
   drivers/gpu/drm/scheduler/sched_entity.c |  6 +++---
   drivers/gpu/drm/scheduler/sched_fence.c  | 15 ++-
   drivers/gpu/drm/scheduler/sched_main.c   | 23 ++-
   include/drm/gpu_scheduler.h  |  6 +-
   10 files changed, 51 insertions(+), 10 deletions(-)

diff --git a/.gitignore b/.gitignore
index 7afd412dadd2..52433a930299 100644
--- a/.gitignore
+++ b/.gitignore
@@ -66,6 +66,7 @@ modules.order
   /modules.builtin
   /modules.builtin.modinfo
   /modules.nsdeps
+*.builtin

   #
   # RPM spec file (make rpm-pkg)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index c5386d13eb4a..a4ec092af9a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1226,6 +1226,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
   if (r)
   goto error_unlock;

+ drm_sched_job_arm(&job->base);
+
   /* No memory allocation is allowed while holding the notifier lock.
* The lock is held until amdgpu_cs_submit is finished and fence is
* added to BOs.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index d33e6d97cc89..5ddb955d2315 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -170,6 +170,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct 
drm_sched_entity *entity,
   if (r)
   return r;

+ drm_sched_job_arm(&job->base);
+
   *f = dma_fence_get(&job->base.s_fence->finished);
   amdgpu_job_free_resources(job);
   drm_sched_entity_push_job(&job->base, entity);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c 
b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index 19826e504efc..af1671f01c7f 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -163,6 +163,8 @@ int etnaviv_sched_push_job(struct drm_sched_entity 
*sched_entity,
   if (ret)
   goto out_unlock;

+ drm_sched_job_arm(&submit->sched_job);
+
   submit->out_fence = dma_fence_get(&submit->sched_job.s_fence->finished);
   submit->out_fence_id = idr_alloc_cyclic(&submit->gpu->fence_idr,
   submit->out_fence, 0,
diff --git a/drivers/gpu/drm/lima/lima_sched.c 
b/drivers/gpu/drm/lima/lima_sched.c
index ecf3267334ff..bd1af1fd8c0f 100644
--- a/drivers/gpu/drm/lima/lima_sched.c
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -129,6 +129,8 @@ int lima_sched_task_init(struct lima_sched_task *task,
   return err;
   }

+ drm_sched_job_arm(&task->base);
+
   task->num_bos = num_bos;
   task->vm = lima_vm_get(vm);

diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c 
b/drivers/gpu/drm/panfrost/panfrost_job.c

Re: [PATCH 10/11] drm/scheduler: Don't store self-dependencies

2021-06-24 Thread Christian König


Am 24.06.21 um 19:29 schrieb Daniel Vetter:

On Thu, Jun 24, 2021 at 07:03:10PM +0200, Christian König wrote:

Am 24.06.21 um 16:00 schrieb Daniel Vetter:

This is essentially part of drm_sched_dependency_optimized(), which
only amdgpu seems to make use of. Use it a bit more.

Signed-off-by: Daniel Vetter 
Cc: "Christian König" 
Cc: Daniel Vetter 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky 
Cc: Alex Deucher 
Cc: Jack Zhang 
---
   drivers/gpu/drm/scheduler/sched_main.c | 7 +++
   1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index 370c336d383f..c31d7cf7df74 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -649,6 +649,13 @@ int drm_sched_job_await_fence(struct drm_sched_job *job,
if (!fence)
return 0;
+   /* if it's a fence from us it's guaranteed to be earlier */
+   if (fence->context == job->entity->fence_context ||
+   fence->context == job->entity->fence_context + 1) {
+   dma_fence_put(fence);
+   return 0;
+   }
+

Well NAK. That would break Vulkan.

The problem is that Vulkan can insert dependencies between jobs which run on
the same queue.

So we need to track those as well and if the previous job for the same
queue/scheduler is not yet finished a pipeline synchronization needs to be
inserted.

That's one of the reasons we wasn't able to unify the dependency handling
yet.

That sounds like an extremely amdgpu specific constraint?


Yeah, that's totally hardware specific.

It's just that I don't know how else we could track that without having 
the same separation as in amdgpu between implicit and explicit fences. 
And as far as I understand it that's exactly what you want to avoid.


As I said this turned out to be really awkward.


You're also the
only one who keeps track of whether the previous job we've scheduled has
finished already (I guess they can get pipelined and you don't flush by
default), so you insert fences.


Yes, exactly that.


I guess we can add a await_fence_no_dedup or so for amdgpu, but I'm not
sure why we have to inflict this design constraint on all other drivers?
At least I'm not seeing anything in lima, panfrost, v3d or entaviv that
would break with this, and i915 will also be perfectly fine.

Also note: I'm not using this for amdgpu, exactly because there's a few
funny things going on.


Yeah, exactly the reason why we never unified this.

Regards,
Christian.


Finally: You _really_ need explicit dependency handling for vulkan in your
uapi, instead of the kernel second-guessing what userspace might be doing.
That's really not how vulkan is designed to work :-)




Cheers, Daniel



Christian.


/* Deduplicate if we already depend on a fence from the same context.
 * This lets the size of the array of deps scale with the number of
 * engines involved, rather than the number of BOs.

Re: [PATCH 01/11] drm/sched: Split drm_sched_job_init

2021-06-24 Thread Daniel Vetter

On Thu, Jun 24, 2021 at 7:30 PM Christian König
 wrote:
>
> Am 24.06.21 um 16:00 schrieb Daniel Vetter:
> > This is a very confusingly named function, because not just does it
> > init an object, it arms it and provides a point of no return for
> > pushing a job into the scheduler. It would be nice if that's a bit
> > clearer in the interface.
>
> We originally had that in the push_job interface, but moved that to init
> for some reason I don't remember.
>
> > But the real reason is that I want to push the dependency tracking
> > helpers into the scheduler code, and that means drm_sched_job_init
> > must be called a lot earlier, without arming the job.
>
> I'm really questioning myself if I like that naming.
>
> What about using drm_sched_job_add_dependency instead?

You're suggesting a
s/drm_sched_job_init/drm_sched_job_add_dependency/, or just replied to
the wrong patch?
-Daniel

>
> Christian.
>
> >
> > Signed-off-by: Daniel Vetter 
> > Cc: Lucas Stach 
> > Cc: Russell King 
> > Cc: Christian Gmeiner 
> > Cc: Qiang Yu 
> > Cc: Rob Herring 
> > Cc: Tomeu Vizoso 
> > Cc: Steven Price 
> > Cc: Alyssa Rosenzweig 
> > Cc: David Airlie 
> > Cc: Daniel Vetter 
> > Cc: Sumit Semwal 
> > Cc: "Christian König" 
> > Cc: Masahiro Yamada 
> > Cc: Kees Cook 
> > Cc: Adam Borowski 
> > Cc: Nick Terrell 
> > Cc: Mauro Carvalho Chehab 
> > Cc: Paul Menzel 
> > Cc: Sami Tolvanen 
> > Cc: Viresh Kumar 
> > Cc: Alex Deucher 
> > Cc: Dave Airlie 
> > Cc: Nirmoy Das 
> > Cc: Deepak R Varma 
> > Cc: Lee Jones 
> > Cc: Kevin Wang 
> > Cc: Chen Li 
> > Cc: Luben Tuikov 
> > Cc: "Marek Olšák" 
> > Cc: Dennis Li 
> > Cc: Maarten Lankhorst 
> > Cc: Andrey Grodzovsky 
> > Cc: Sonny Jiang 
> > Cc: Boris Brezillon 
> > Cc: Tian Tao 
> > Cc: Jack Zhang 
> > Cc: etna...@lists.freedesktop.org
> > Cc: l...@lists.freedesktop.org
> > Cc: linux-me...@vger.kernel.org
> > Cc: linaro-mm-...@lists.linaro.org
> > ---
> >   .gitignore   |  1 +
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  2 ++
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  |  2 ++
> >   drivers/gpu/drm/etnaviv/etnaviv_sched.c  |  2 ++
> >   drivers/gpu/drm/lima/lima_sched.c|  2 ++
> >   drivers/gpu/drm/panfrost/panfrost_job.c  |  2 ++
> >   drivers/gpu/drm/scheduler/sched_entity.c |  6 +++---
> >   drivers/gpu/drm/scheduler/sched_fence.c  | 15 ++-
> >   drivers/gpu/drm/scheduler/sched_main.c   | 23 ++-
> >   include/drm/gpu_scheduler.h  |  6 +-
> >   10 files changed, 51 insertions(+), 10 deletions(-)
> >
> > diff --git a/.gitignore b/.gitignore
> > index 7afd412dadd2..52433a930299 100644
> > --- a/.gitignore
> > +++ b/.gitignore
> > @@ -66,6 +66,7 @@ modules.order
> >   /modules.builtin
> >   /modules.builtin.modinfo
> >   /modules.nsdeps
> > +*.builtin
> >
> >   #
> >   # RPM spec file (make rpm-pkg)
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > index c5386d13eb4a..a4ec092af9a7 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > @@ -1226,6 +1226,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser 
> > *p,
> >   if (r)
> >   goto error_unlock;
> >
> > + drm_sched_job_arm(&job->base);
> > +
> >   /* No memory allocation is allowed while holding the notifier lock.
> >* The lock is held until amdgpu_cs_submit is finished and fence is
> >* added to BOs.
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> > index d33e6d97cc89..5ddb955d2315 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> > @@ -170,6 +170,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct 
> > drm_sched_entity *entity,
> >   if (r)
> >   return r;
> >
> > + drm_sched_job_arm(&job->base);
> > +
> >   *f = dma_fence_get(&job->base.s_fence->finished);
> >   amdgpu_job_free_resources(job);
> >   drm_sched_entity_push_job(&job->base, entity);
> > diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c 
> > b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> > index 19826e504efc..af1671f01c7f 100644
> > --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> > +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> > @@ -163,6 +163,8 @@ int etnaviv_sched_push_job(struct drm_sched_entity 
> > *sched_entity,
> >   if (ret)
> >   goto out_unlock;
> >
> > + drm_sched_job_arm(&submit->sched_job);
> > +
> >   submit->out_fence = 
> > dma_fence_get(&submit->sched_job.s_fence->finished);
> >   submit->out_fence_id = idr_alloc_cyclic(&submit->gpu->fence_idr,
> >   submit->out_fence, 0,
> > diff --git a/drivers/gpu/drm/lima/lima_sched.c 
> > b/drivers/gpu/drm/lima/lima_sched.c
> > index ecf3267334ff..bd1af1fd8c0f 100644
> > --- a/drivers/gpu/drm/lima/lima_sched.c
> > +++ b/drivers/gpu/drm/lima/

Re: [Intel-gfx] [PATCH 05/47] drm/i915/guc: Add stall timer to non blocking CTB send function

2021-06-24 Thread Michal Wajdeczko




On 24.06.2021 09:04, Matthew Brost wrote:
> Implement a stall timer which fails H2G CTBs once a period of time
> with no forward progress is reached to prevent deadlock.
> 
> Also update to ct_write to return -EIO rather than -EPIPE on a
> corrupted descriptor.

by doing so you will have the same error code for two different problems:

a) corrupted CTB descriptor (definitely unrecoverable)
b) long stall in CTB processing (still recoverable)

while caller is explicitly instructed to retry only on:

c) temporary stall in CTB processing (likely recoverable)

so why do we want to limit our diagnostics?

> 
> Signed-off-by: John Harrison 
> Signed-off-by: Daniele Ceraolo Spurio 
> Signed-off-by: Matthew Brost 
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 47 +--
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |  4 ++
>  2 files changed, 48 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> index c9a65d05911f..27ec30b5ef47 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> @@ -319,6 +319,7 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct)
>   goto err_deregister;
>  
>   ct->enabled = true;
> + ct->stall_time = KTIME_MAX;
>  
>   return 0;
>  
> @@ -392,7 +393,7 @@ static int ct_write(struct intel_guc_ct *ct,
>   unsigned int i;
>  
>   if (unlikely(ctb->broken))
> - return -EPIPE;
> + return -EIO;
>  
>   if (unlikely(desc->status))
>   goto corrupted;
> @@ -464,7 +465,7 @@ static int ct_write(struct intel_guc_ct *ct,
>   CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n",
>desc->head, desc->tail, desc->status);
>   ctb->broken = true;
> - return -EPIPE;
> + return -EIO;
>  }
>  
>  /**
> @@ -507,6 +508,18 @@ static int wait_for_ct_request_update(struct ct_request 
> *req, u32 *status)
>   return err;
>  }
>  
> +#define GUC_CTB_TIMEOUT_MS   1500

it's 150% of core CTB timeout, maybe we should correlate them ?

> +static inline bool ct_deadlocked(struct intel_guc_ct *ct)
> +{
> + long timeout = GUC_CTB_TIMEOUT_MS;
> + bool ret = ktime_ms_delta(ktime_get(), ct->stall_time) > timeout;
> +
> + if (unlikely(ret))
> + CT_ERROR(ct, "CT deadlocked\n");

nit: in commit message you said all these changes are to "prevent
deadlock" so maybe this message should rather be:

int delta = ktime_ms_delta(ktime_get(), ct->stall_time);

CT_ERROR(ct, "Communication stalled for %dms\n", delta);

(note that CT_ERROR already adds "CT" prefix)

> +
> + return ret;
> +}
> +
>  static inline bool h2g_has_room(struct intel_guc_ct_buffer *ctb, u32 len_dw)
>  {
>   struct guc_ct_buffer_desc *desc = ctb->desc;
> @@ -518,6 +531,26 @@ static inline bool h2g_has_room(struct 
> intel_guc_ct_buffer *ctb, u32 len_dw)
>   return space >= len_dw;
>  }
>  
> +static int has_room_nb(struct intel_guc_ct *ct, u32 len_dw)
> +{
> + struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
> +
> + lockdep_assert_held(&ct->ctbs.send.lock);
> +
> + if (unlikely(!h2g_has_room(ctb, len_dw))) {
> + if (ct->stall_time == KTIME_MAX)
> + ct->stall_time = ktime_get();
> +
> + if (unlikely(ct_deadlocked(ct)))

and maybe above message should be printed somewhere around here when we
detect "deadlock" for the first time?

> + return -EIO;
> + else
> + return -EBUSY;
> + }
> +
> + ct->stall_time = KTIME_MAX;
> + return 0;
> +}
> +
>  static int ct_send_nb(struct intel_guc_ct *ct,
> const u32 *action,
> u32 len,
> @@ -530,7 +563,7 @@ static int ct_send_nb(struct intel_guc_ct *ct,
>  
>   spin_lock_irqsave(&ctb->lock, spin_flags);
>  
> - ret = h2g_has_room(ctb, len + 1);
> + ret = has_room_nb(ct, len + 1);
>   if (unlikely(ret))
>   goto out;
>  
> @@ -574,11 +607,19 @@ static int ct_send(struct intel_guc_ct *ct,
>  retry:
>   spin_lock_irqsave(&ct->ctbs.send.lock, flags);
>   if (unlikely(!h2g_has_room(ctb, len + 1))) {
> + if (ct->stall_time == KTIME_MAX)
> + ct->stall_time = ktime_get();

as this is a repeated pattern, maybe it should be moved to h2g_has_room
or other wrapper ?

>   spin_unlock_irqrestore(&ct->ctbs.send.lock, flags);
> +
> + if (unlikely(ct_deadlocked(ct)))
> + return -EIO;
> +
>   cond_resched();
>   goto retry;
>   }
>  
> + ct->stall_time = KTIME_MAX;

this one too

> +
>   fence = ct_get_next_fence(ct);
>   request.fence = fence;
>   request.status = 0;
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
> index eb69263324ba..55ef7c52472f 100644
> ---

Re: [PATCH 01/11] drm/sched: Split drm_sched_job_init

2021-06-24 Thread Christian König


Am 24.06.21 um 16:00 schrieb Daniel Vetter:

This is a very confusingly named function, because not just does it
init an object, it arms it and provides a point of no return for
pushing a job into the scheduler. It would be nice if that's a bit
clearer in the interface.


We originally had that in the push_job interface, but moved that to init 
for some reason I don't remember.



But the real reason is that I want to push the dependency tracking
helpers into the scheduler code, and that means drm_sched_job_init
must be called a lot earlier, without arming the job.


I'm really questioning myself if I like that naming.

What about using drm_sched_job_add_dependency instead?

Christian.



Signed-off-by: Daniel Vetter 
Cc: Lucas Stach 
Cc: Russell King 
Cc: Christian Gmeiner 
Cc: Qiang Yu 
Cc: Rob Herring 
Cc: Tomeu Vizoso 
Cc: Steven Price 
Cc: Alyssa Rosenzweig 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: Sumit Semwal 
Cc: "Christian König" 
Cc: Masahiro Yamada 
Cc: Kees Cook 
Cc: Adam Borowski 
Cc: Nick Terrell 
Cc: Mauro Carvalho Chehab 
Cc: Paul Menzel 
Cc: Sami Tolvanen 
Cc: Viresh Kumar 
Cc: Alex Deucher 
Cc: Dave Airlie 
Cc: Nirmoy Das 
Cc: Deepak R Varma 
Cc: Lee Jones 
Cc: Kevin Wang 
Cc: Chen Li 
Cc: Luben Tuikov 
Cc: "Marek Olšák" 
Cc: Dennis Li 
Cc: Maarten Lankhorst 
Cc: Andrey Grodzovsky 
Cc: Sonny Jiang 
Cc: Boris Brezillon 
Cc: Tian Tao 
Cc: Jack Zhang 
Cc: etna...@lists.freedesktop.org
Cc: l...@lists.freedesktop.org
Cc: linux-me...@vger.kernel.org
Cc: linaro-mm-...@lists.linaro.org
---
  .gitignore   |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  |  2 ++
  drivers/gpu/drm/etnaviv/etnaviv_sched.c  |  2 ++
  drivers/gpu/drm/lima/lima_sched.c|  2 ++
  drivers/gpu/drm/panfrost/panfrost_job.c  |  2 ++
  drivers/gpu/drm/scheduler/sched_entity.c |  6 +++---
  drivers/gpu/drm/scheduler/sched_fence.c  | 15 ++-
  drivers/gpu/drm/scheduler/sched_main.c   | 23 ++-
  include/drm/gpu_scheduler.h  |  6 +-
  10 files changed, 51 insertions(+), 10 deletions(-)

diff --git a/.gitignore b/.gitignore
index 7afd412dadd2..52433a930299 100644
--- a/.gitignore
+++ b/.gitignore
@@ -66,6 +66,7 @@ modules.order
  /modules.builtin
  /modules.builtin.modinfo
  /modules.nsdeps
+*.builtin
  
  #

  # RPM spec file (make rpm-pkg)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index c5386d13eb4a..a4ec092af9a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1226,6 +1226,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
if (r)
goto error_unlock;
  
+	drm_sched_job_arm(&job->base);

+
/* No memory allocation is allowed while holding the notifier lock.
 * The lock is held until amdgpu_cs_submit is finished and fence is
 * added to BOs.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index d33e6d97cc89..5ddb955d2315 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -170,6 +170,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct 
drm_sched_entity *entity,
if (r)
return r;
  
+	drm_sched_job_arm(&job->base);

+
*f = dma_fence_get(&job->base.s_fence->finished);
amdgpu_job_free_resources(job);
drm_sched_entity_push_job(&job->base, entity);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c 
b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index 19826e504efc..af1671f01c7f 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -163,6 +163,8 @@ int etnaviv_sched_push_job(struct drm_sched_entity 
*sched_entity,
if (ret)
goto out_unlock;
  
+	drm_sched_job_arm(&submit->sched_job);

+
submit->out_fence = dma_fence_get(&submit->sched_job.s_fence->finished);
submit->out_fence_id = idr_alloc_cyclic(&submit->gpu->fence_idr,
submit->out_fence, 0,
diff --git a/drivers/gpu/drm/lima/lima_sched.c 
b/drivers/gpu/drm/lima/lima_sched.c
index ecf3267334ff..bd1af1fd8c0f 100644
--- a/drivers/gpu/drm/lima/lima_sched.c
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -129,6 +129,8 @@ int lima_sched_task_init(struct lima_sched_task *task,
return err;
}
  
+	drm_sched_job_arm(&task->base);

+
task->num_bos = num_bos;
task->vm = lima_vm_get(vm);
  
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c

index beb62c8fc851..1e950534b9b0 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -244,6 +244,8 @@ int panfrost_job_push(struct panfrost_job *job)
goto unlock;
}
  
+	drm_sched_job_arm(&job->base);

+
job->render_done_fence = dma_fen

Re: [PATCH 10/11] drm/scheduler: Don't store self-dependencies

2021-06-24 Thread Daniel Vetter

On Thu, Jun 24, 2021 at 07:03:10PM +0200, Christian König wrote:
> Am 24.06.21 um 16:00 schrieb Daniel Vetter:
> > This is essentially part of drm_sched_dependency_optimized(), which
> > only amdgpu seems to make use of. Use it a bit more.
> > 
> > Signed-off-by: Daniel Vetter 
> > Cc: "Christian König" 
> > Cc: Daniel Vetter 
> > Cc: Luben Tuikov 
> > Cc: Andrey Grodzovsky 
> > Cc: Alex Deucher 
> > Cc: Jack Zhang 
> > ---
> >   drivers/gpu/drm/scheduler/sched_main.c | 7 +++
> >   1 file changed, 7 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
> > b/drivers/gpu/drm/scheduler/sched_main.c
> > index 370c336d383f..c31d7cf7df74 100644
> > --- a/drivers/gpu/drm/scheduler/sched_main.c
> > +++ b/drivers/gpu/drm/scheduler/sched_main.c
> > @@ -649,6 +649,13 @@ int drm_sched_job_await_fence(struct drm_sched_job 
> > *job,
> > if (!fence)
> > return 0;
> > +   /* if it's a fence from us it's guaranteed to be earlier */
> > +   if (fence->context == job->entity->fence_context ||
> > +   fence->context == job->entity->fence_context + 1) {
> > +   dma_fence_put(fence);
> > +   return 0;
> > +   }
> > +
> 
> Well NAK. That would break Vulkan.
> 
> The problem is that Vulkan can insert dependencies between jobs which run on
> the same queue.
> 
> So we need to track those as well and if the previous job for the same
> queue/scheduler is not yet finished a pipeline synchronization needs to be
> inserted.
> 
> That's one of the reasons we wasn't able to unify the dependency handling
> yet.

That sounds like an extremely amdgpu specific constraint? You're also the
only one who keeps track of whether the previous job we've scheduled has
finished already (I guess they can get pipelined and you don't flush by
default), so you insert fences.

I guess we can add a await_fence_no_dedup or so for amdgpu, but I'm not
sure why we have to inflict this design constraint on all other drivers?
At least I'm not seeing anything in lima, panfrost, v3d or entaviv that
would break with this, and i915 will also be perfectly fine.

Also note: I'm not using this for amdgpu, exactly because there's a few
funny things going on.

Finally: You _really_ need explicit dependency handling for vulkan in your
uapi, instead of the kernel second-guessing what userspace might be doing.
That's really not how vulkan is designed to work :-)

Cheers, Daniel

> Christian.
> 
> > /* Deduplicate if we already depend on a fence from the same context.
> >  * This lets the size of the array of deps scale with the number of
> >  * engines involved, rather than the number of BOs.
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

Re: [Intel-gfx] [RFC PATCH 36/97] drm/i915/guc: Add non blocking CTB send function

2021-06-24 Thread Daniel Vetter

On Thu, Jun 24, 2021 at 09:38:33AM -0700, Matthew Brost wrote:
> On Thu, Jun 10, 2021 at 05:27:48PM +0200, Daniel Vetter wrote:
> > On Wed, Jun 09, 2021 at 04:10:23PM -0700, Matthew Brost wrote:
> > > On Tue, Jun 08, 2021 at 10:46:15AM +0200, Daniel Vetter wrote:
> > > > On Tue, Jun 8, 2021 at 10:39 AM Tvrtko Ursulin
> > > >  wrote:
> > > > >
> > > > >
> > > > > On 07/06/2021 18:31, Matthew Brost wrote:
> > > > > > On Thu, May 27, 2021 at 04:11:50PM +0100, Tvrtko Ursulin wrote:
> > > > > >>
> > > > > >> On 27/05/2021 15:35, Matthew Brost wrote:
> > > > > >>> On Thu, May 27, 2021 at 11:02:24AM +0100, Tvrtko Ursulin wrote:
> > > > > 
> > > > >  On 26/05/2021 19:10, Matthew Brost wrote:
> > > > > 
> > > > >  [snip]
> > > > > 
> > > > > > +static int ct_send_nb(struct intel_guc_ct *ct,
> > > > > > +   const u32 *action,
> > > > > > +   u32 len,
> > > > > > +   u32 flags)
> > > > > > +{
> > > > > > + struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
> > > > > > + unsigned long spin_flags;
> > > > > > + u32 fence;
> > > > > > + int ret;
> > > > > > +
> > > > > > + spin_lock_irqsave(&ctb->lock, spin_flags);
> > > > > > +
> > > > > > + ret = ctb_has_room(ctb, len + 1);
> > > > > > + if (unlikely(ret))
> > > > > > + goto out;
> > > > > > +
> > > > > > + fence = ct_get_next_fence(ct);
> > > > > > + ret = ct_write(ct, action, len, fence, flags);
> > > > > > + if (unlikely(ret))
> > > > > > + goto out;
> > > > > > +
> > > > > > + intel_guc_notify(ct_to_guc(ct));
> > > > > > +
> > > > > > +out:
> > > > > > + spin_unlock_irqrestore(&ctb->lock, spin_flags);
> > > > > > +
> > > > > > + return ret;
> > > > > > +}
> > > > > > +
> > > > > >   static int ct_send(struct intel_guc_ct *ct,
> > > > > >  const u32 *action,
> > > > > >  u32 len,
> > > > > > @@ -473,6 +541,7 @@ static int ct_send(struct intel_guc_ct 
> > > > > > *ct,
> > > > > >  u32 response_buf_size,
> > > > > >  u32 *status)
> > > > > >   {
> > > > > > + struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
> > > > > >   struct ct_request request;
> > > > > >   unsigned long flags;
> > > > > >   u32 fence;
> > > > > > @@ -482,8 +551,20 @@ static int ct_send(struct intel_guc_ct 
> > > > > > *ct,
> > > > > >   GEM_BUG_ON(!len);
> > > > > >   GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK);
> > > > > >   GEM_BUG_ON(!response_buf && 
> > > > > > response_buf_size);
> > > > > > + might_sleep();
> > > > > 
> > > > >  Sleep is just cond_resched below or there is more?
> > > > > 
> > > > > >>>
> > > > > >>> Yes, the cond_resched.
> > > > > >>>
> > > > > > + /*
> > > > > > +  * We use a lazy spin wait loop here as we believe 
> > > > > > that if the CT
> > > > > > +  * buffers are sized correctly the flow control 
> > > > > > condition should be
> > > > > > +  * rare.
> > > > > > +  */
> > > > > > +retry:
> > > > > >   spin_lock_irqsave(&ct->ctbs.send.lock, flags);
> > > > > > + if (unlikely(!ctb_has_room(ctb, len + 1))) {
> > > > > > + spin_unlock_irqrestore(&ct->ctbs.send.lock, 
> > > > > > flags);
> > > > > > + cond_resched();
> > > > > > + goto retry;
> > > > > > + }
> > > > > 
> > > > >  If this patch is about adding a non-blocking send function, 
> > > > >  and below we can
> > > > >  see that it creates a fork:
> > > > > 
> > > > >  intel_guc_ct_send:
> > > > >  ...
> > > > > if (flags & INTEL_GUC_SEND_NB)
> > > > > return ct_send_nb(ct, action, len, flags);
> > > > > 
> > > > > ret = ct_send(ct, action, len, response_buf, 
> > > > >  response_buf_size, &status);
> > > > > 
> > > > >  Then why is there a change in ct_send here, which is not the 
> > > > >  new
> > > > >  non-blocking path?
> > > > > 
> > > > > >>>
> > > > > >>> There is not a change to ct_send(), just to intel_guc_ct_send.
> > > > > >>
> > > > > >> I was doing by the diff which says:
> > > > > >>
> > > > > >> static int ct_send(struct intel_guc_ct *ct,
> > > > > >> cons

Re: [PATCH 06/11] drm/v3d: Move drm_sched_job_init to v3d_job_init

2021-06-24 Thread Daniel Vetter

On Thu, Jun 24, 2021 at 09:59:08AM -0700, Emma Anholt wrote:
> On Thu, Jun 24, 2021 at 7:00 AM Daniel Vetter  wrote:
> >
> > Prep work for using the scheduler dependency handling.

I'll add "We need to call drm_sched_job_init earlier so we can use the new
drm_sched_job_await* functions for dependency handling here. That gives a
bit more context on what's going on here.

> >
> > Signed-off-by: Daniel Vetter 
> > Cc: Emma Anholt 
> 
> Back when I wrote this, I think there were rules that there had to be
> no failure paths between a job_init and a push.  Has that changed?
> 
> I really don't have the context to evaluate this, I'm not sure what
> new "scheduler dependency handling" is given that there was already
> something that I considered to be dependency handling!

Full patch series link:

https://lore.kernel.org/dri-devel/20210624140025.438303-1-daniel.vet...@ffwll.ch/T/#t


The job_init vs push_job is addressed here:

https://lore.kernel.org/dri-devel/20210624140025.438303-1-daniel.vet...@ffwll.ch/T/#mb6c4d75e1c57a5056d7b2ec8fbb9839fc5be41a7

I split job_init into job_init (which can fail, and be done earlier) and
job_arm (which can't fail, and must be atomic with the push_job).

The entire goal of this is to lift the dependency handling from "everyone
copypastes v3d" to "drm_sched has it for you already", which is this patch
here:

https://lore.kernel.org/dri-devel/20210624140025.438303-1-daniel.vet...@ffwll.ch/T/#m9f64aaf840cbf8815cd2ea9a68f99a51da9baa5f

Cheers, Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

Re: [PATCH 01/47] drm/i915/guc: Relax CTB response timeout

2021-06-24 Thread Michal Wajdeczko




On 24.06.2021 09:04, Matthew Brost wrote:
> In upcoming patch we will allow more CTB requests to be sent in
> parallel to the GuC for processing, so we shouldn't assume any more
> that GuC will always reply without 10ms.
> 
> Use bigger value hardcoded value of 1s instead.
> 
> v2: Add CONFIG_DRM_I915_GUC_CTB_TIMEOUT config option
> v3:
>  (Daniel Vetter)
>   - Use hardcoded value of 1s rather than config option
> 
> Signed-off-by: Matthew Brost 
> Cc: Michal Wajdeczko 
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> index 43409044528e..a59e239497ee 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> @@ -474,14 +474,16 @@ static int wait_for_ct_request_update(struct ct_request 
> *req, u32 *status)
>   /*
>* Fast commands should complete in less than 10us, so sample quickly
>* up to that length of time, then switch to a slower sleep-wait loop.
> -  * No GuC command should ever take longer than 10ms.
> +  * No GuC command should ever take longer than 10ms but many GuC
> +  * commands can be inflight at time, so use a 1s timeout on the slower
> +  * sleep-wait loop.
>*/
>  #define done \
>   (FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \
>GUC_HXG_ORIGIN_GUC)
>   err = wait_for_us(done, 10);
>   if (err)
> - err = wait_for(done, 10);
> + err = wait_for(done, 1000);

can we add #defines for these 10/1000 values? with that

Reviewed-by: Michal Wajdeczko 

>  #undef done
>  
>   if (unlikely(err))
>

Re: [PATCH 10/11] drm/scheduler: Don't store self-dependencies

2021-06-24 Thread Christian König

Am 24.06.21 um 16:00 schrieb Daniel Vetter:

This is essentially part of drm_sched_dependency_optimized(), which
only amdgpu seems to make use of. Use it a bit more.

Signed-off-by: Daniel Vetter 
Cc: "Christian König" 
Cc: Daniel Vetter 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky 
Cc: Alex Deucher 
Cc: Jack Zhang 
---
  drivers/gpu/drm/scheduler/sched_main.c | 7 +++
  1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index 370c336d383f..c31d7cf7df74 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -649,6 +649,13 @@ int drm_sched_job_await_fence(struct drm_sched_job *job,
if (!fence)
return 0;

+	/* if it's a fence from us it's guaranteed to be earlier */

+   if (fence->context == job->entity->fence_context ||
+   fence->context == job->entity->fence_context + 1) {
+   dma_fence_put(fence);
+   return 0;
+   }
+

Well NAK. That would break Vulkan.

The problem is that Vulkan can insert dependencies between jobs which 
run on the same queue.

So we need to track those as well and if the previous job for the same 
queue/scheduler is not yet finished a pipeline synchronization needs to 
be inserted.

That's one of the reasons we wasn't able to unify the dependency 
handling yet.

Christian.

/* Deduplicate if we already depend on a fence from the same context.
 * This lets the size of the array of deps scale with the number of
 * engines involved, rather than the number of BOs.

Re: [Intel-gfx] [PATCH 04/47] drm/i915/guc: Add non blocking CTB send function

2021-06-24 Thread Michal Wajdeczko




On 24.06.2021 17:49, Matthew Brost wrote:
> On Thu, Jun 24, 2021 at 04:48:32PM +0200, Michal Wajdeczko wrote:
>>
>>
>> On 24.06.2021 09:04, Matthew Brost wrote:
>>> Add non blocking CTB send function, intel_guc_send_nb. GuC submission
>>> will send CTBs in the critical path and does not need to wait for these
>>> CTBs to complete before moving on, hence the need for this new function.
>>>
>>> The non-blocking CTB now must have a flow control mechanism to ensure
>>> the buffer isn't overrun. A lazy spin wait is used as we believe the
>>> flow control condition should be rare with a properly sized buffer.
>>>
>>> The function, intel_guc_send_nb, is exported in this patch but unused.
>>> Several patches later in the series make use of this function.
>>>
>>> Signed-off-by: John Harrison 
>>> Signed-off-by: Matthew Brost 
>>> ---
>>>  drivers/gpu/drm/i915/gt/uc/intel_guc.h| 12 +++-
>>>  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 77 +--
>>>  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |  3 +-
>>>  3 files changed, 82 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
>>> index 4abc59f6f3cd..24b1df6ad4ae 100644
>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
>>> @@ -74,7 +74,15 @@ static inline struct intel_guc *log_to_guc(struct 
>>> intel_guc_log *log)
>>>  static
>>>  inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 
>>> len)
>>>  {
>>> -   return intel_guc_ct_send(&guc->ct, action, len, NULL, 0);
>>> +   return intel_guc_ct_send(&guc->ct, action, len, NULL, 0, 0);
>>> +}
>>> +
>>> +#define INTEL_GUC_SEND_NB  BIT(31)
>>
>> hmm, this flag really belongs to intel_guc_ct_send() so it should be
>> defined as CTB flag near that function declaration
>>
> 
> I can move this up a few lines.
> 
>>> +static
>>> +inline int intel_guc_send_nb(struct intel_guc *guc, const u32 *action, u32 
>>> len)
>>> +{
>>> +   return intel_guc_ct_send(&guc->ct, action, len, NULL, 0,
>>> +INTEL_GUC_SEND_NB);
>>>  }
>>>  
>>>  static inline int
>>> @@ -82,7 +90,7 @@ intel_guc_send_and_receive(struct intel_guc *guc, const 
>>> u32 *action, u32 len,
>>>u32 *response_buf, u32 response_buf_size)
>>>  {
>>> return intel_guc_ct_send(&guc->ct, action, len,
>>> -response_buf, response_buf_size);
>>> +response_buf, response_buf_size, 0);
>>>  }
>>>  
>>>  static inline void intel_guc_to_host_event_handler(struct intel_guc *guc)
>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>> index a17215920e58..c9a65d05911f 100644
>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>> @@ -3,6 +3,11 @@
>>>   * Copyright © 2016-2019 Intel Corporation
>>>   */
>>>  
>>> +#include 
>>> +#include 
>>> +#include 
>>> +#include 
>>> +
>>>  #include "i915_drv.h"
>>>  #include "intel_guc_ct.h"
>>>  #include "gt/intel_gt.h"
>>> @@ -373,7 +378,7 @@ static void write_barrier(struct intel_guc_ct *ct)
>>>  static int ct_write(struct intel_guc_ct *ct,
>>> const u32 *action,
>>> u32 len /* in dwords */,
>>> -   u32 fence)
>>> +   u32 fence, u32 flags)
>>>  {
>>> struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
>>> struct guc_ct_buffer_desc *desc = ctb->desc;
>>> @@ -421,9 +426,13 @@ static int ct_write(struct intel_guc_ct *ct,
>>>  FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) |
>>>  FIELD_PREP(GUC_CTB_MSG_0_FENCE, fence);
>>>  
>>> -   hxg = FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
>>> - FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION |
>>> -GUC_HXG_REQUEST_MSG_0_DATA0, action[0]);
>>> +   hxg = (flags & INTEL_GUC_SEND_NB) ?
>>> +   (FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
>>> +FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
>>> +   GUC_HXG_EVENT_MSG_0_DATA0, action[0])) :
>>> +   (FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
>>> +FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION |
>>> +   GUC_HXG_REQUEST_MSG_0_DATA0, action[0]));
>>
>> or as we already switched to accept and return whole HXG messages in
>> guc_send_mmio() maybe we should do the same for CTB variant too and
>> instead of using extra flag just let caller to prepare proper HXG header
>> with HXG_EVENT type and then in CTB code just look at this type to make
>> decision which code path to use
>>
> 
> Not sure I follow. Anyways could this be done in a follow up by you if
> want this change.
>  
>> note that existing callers should not be impacted, as full HXG header
>> for the REQUEST message looks exactly the same as "action" code alone.
>>
>>>  
>>> CT_DEBUG(ct, "writing (tail %u) %*ph %*ph %*ph\

Re: [PATCH 02/11] drm/sched: Add dependency tracking

2021-06-24 Thread Christian König


Am 24.06.21 um 16:39 schrieb Lucas Stach:

Am Donnerstag, dem 24.06.2021 um 16:00 +0200 schrieb Daniel Vetter:

Instead of just a callback we can just glue in the gem helpers that
panfrost, v3d and lima currently use. There's really not that many
ways to skin this cat.

On the naming bikeshed: The idea for using _await_ to denote adding
dependencies to a job comes from i915, where that's used quite
extensively all over the place, in lots of datastructures.

Signed-off-by: Daniel Vetter 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: Sumit Semwal 
Cc: "Christian König" 
Cc: Andrey Grodzovsky 
Cc: Lee Jones 
Cc: Nirmoy Das 
Cc: Boris Brezillon 
Cc: Luben Tuikov 
Cc: Alex Deucher 
Cc: Jack Zhang 
Cc: linux-me...@vger.kernel.org
Cc: linaro-mm-...@lists.linaro.org
---
  drivers/gpu/drm/scheduler/sched_entity.c |  18 +++-
  drivers/gpu/drm/scheduler/sched_main.c   | 103 +++
  include/drm/gpu_scheduler.h  |  31 ++-
  3 files changed, 146 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
b/drivers/gpu/drm/scheduler/sched_entity.c
index f7347c284886..b6f72fafd504 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -211,6 +211,19 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence 
*f,
job->sched->ops->free_job(job);
  }
  
+static struct dma_fence *

+drm_sched_job_dependency(struct drm_sched_job *job,
+struct drm_sched_entity *entity)
+{
+   if (!xa_empty(&job->dependencies))
+   return xa_erase(&job->dependencies, job->last_dependency++);

Not sure how much it buys us now that you dedup fences before adding
them to the xa, but we could avoid potentially avoid some ping-pong
looping in the scheduler by checking if the fence we are about to
return here is already signaled and skipping to the next one if so.


You absolutely need this, especially for TTM based drivers since you 
basically need to add all the fences from all the BOs in you relocation 
list.


When I initially implemented the dependency handling I've tried multiple 
approaches, including something similar to that one here. Not sure how 
well the performance will be, but I think we can revert to something 
more complicated rather easily when we find that it doesn't work as 
expected.


One unresolved problem is that we need to track the last fence we 
optimized by looking at the scheduler instance. This is necessary since 
Vulkan dependencies don't work correctly otherwise.


Amdgpu currently has a rather awkward workaround for that currently.

But in general it looks like the right thing to do.

Regards,
Christian.




Regards,
Lucas


+
+   if (job->sched->ops->dependency)
+   return job->sched->ops->dependency(job, entity);
+
+   return NULL;
+}
+
  /**
   * drm_sched_entity_kill_jobs - Make sure all remaining jobs are killed
   *
@@ -229,7 +242,7 @@ static void drm_sched_entity_kill_jobs(struct 
drm_sched_entity *entity)
struct drm_sched_fence *s_fence = job->s_fence;
  
  		/* Wait for all dependencies to avoid data corruptions */

-   while ((f = job->sched->ops->dependency(job, entity)))
+   while ((f = drm_sched_job_dependency(job, entity)))
dma_fence_wait(f, false);
  
  		drm_sched_fence_scheduled(s_fence);

@@ -419,7 +432,6 @@ static bool drm_sched_entity_add_dependency_cb(struct 
drm_sched_entity *entity)
   */
  struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity 
*entity)
  {
-   struct drm_gpu_scheduler *sched = entity->rq->sched;
struct drm_sched_job *sched_job;
  
  	sched_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));

@@ -427,7 +439,7 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct 
drm_sched_entity *entity)
return NULL;
  
  	while ((entity->dependency =

-   sched->ops->dependency(sched_job, entity))) {
+   drm_sched_job_dependency(sched_job, entity))) {
trace_drm_sched_job_wait_dep(sched_job, entity->dependency);
  
  		if (drm_sched_entity_add_dependency_cb(entity))

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index 70eefed17e06..370c336d383f 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -603,6 +603,8 @@ int drm_sched_job_init(struct drm_sched_job *job,
  
  	INIT_LIST_HEAD(&job->list);
  
+	xa_init_flags(&job->dependencies, XA_FLAGS_ALLOC);

+
return 0;
  }
  EXPORT_SYMBOL(drm_sched_job_init);
@@ -626,6 +628,98 @@ void drm_sched_job_arm(struct drm_sched_job *job)
  }
  EXPORT_SYMBOL(drm_sched_job_arm);
  
+/**

+ * drm_sched_job_await_fence - adds the fence as a job dependency
+ * @job: scheduler job to add the dependencies to
+ * @fence: the dma_fence to add to the list of dependencies.
+ *
+ * Note that @fence is consumed in both the suc

Re: [Intel-gfx] [RFC PATCH 36/97] drm/i915/guc: Add non blocking CTB send function

2021-06-24 Thread Matthew Brost

On Thu, Jun 10, 2021 at 05:27:48PM +0200, Daniel Vetter wrote:
> On Wed, Jun 09, 2021 at 04:10:23PM -0700, Matthew Brost wrote:
> > On Tue, Jun 08, 2021 at 10:46:15AM +0200, Daniel Vetter wrote:
> > > On Tue, Jun 8, 2021 at 10:39 AM Tvrtko Ursulin
> > >  wrote:
> > > >
> > > >
> > > > On 07/06/2021 18:31, Matthew Brost wrote:
> > > > > On Thu, May 27, 2021 at 04:11:50PM +0100, Tvrtko Ursulin wrote:
> > > > >>
> > > > >> On 27/05/2021 15:35, Matthew Brost wrote:
> > > > >>> On Thu, May 27, 2021 at 11:02:24AM +0100, Tvrtko Ursulin wrote:
> > > > 
> > > >  On 26/05/2021 19:10, Matthew Brost wrote:
> > > > 
> > > >  [snip]
> > > > 
> > > > > +static int ct_send_nb(struct intel_guc_ct *ct,
> > > > > +   const u32 *action,
> > > > > +   u32 len,
> > > > > +   u32 flags)
> > > > > +{
> > > > > + struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
> > > > > + unsigned long spin_flags;
> > > > > + u32 fence;
> > > > > + int ret;
> > > > > +
> > > > > + spin_lock_irqsave(&ctb->lock, spin_flags);
> > > > > +
> > > > > + ret = ctb_has_room(ctb, len + 1);
> > > > > + if (unlikely(ret))
> > > > > + goto out;
> > > > > +
> > > > > + fence = ct_get_next_fence(ct);
> > > > > + ret = ct_write(ct, action, len, fence, flags);
> > > > > + if (unlikely(ret))
> > > > > + goto out;
> > > > > +
> > > > > + intel_guc_notify(ct_to_guc(ct));
> > > > > +
> > > > > +out:
> > > > > + spin_unlock_irqrestore(&ctb->lock, spin_flags);
> > > > > +
> > > > > + return ret;
> > > > > +}
> > > > > +
> > > > >   static int ct_send(struct intel_guc_ct *ct,
> > > > >  const u32 *action,
> > > > >  u32 len,
> > > > > @@ -473,6 +541,7 @@ static int ct_send(struct intel_guc_ct 
> > > > > *ct,
> > > > >  u32 response_buf_size,
> > > > >  u32 *status)
> > > > >   {
> > > > > + struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
> > > > >   struct ct_request request;
> > > > >   unsigned long flags;
> > > > >   u32 fence;
> > > > > @@ -482,8 +551,20 @@ static int ct_send(struct intel_guc_ct 
> > > > > *ct,
> > > > >   GEM_BUG_ON(!len);
> > > > >   GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK);
> > > > >   GEM_BUG_ON(!response_buf && response_buf_size);
> > > > > + might_sleep();
> > > > 
> > > >  Sleep is just cond_resched below or there is more?
> > > > 
> > > > >>>
> > > > >>> Yes, the cond_resched.
> > > > >>>
> > > > > + /*
> > > > > +  * We use a lazy spin wait loop here as we believe that 
> > > > > if the CT
> > > > > +  * buffers are sized correctly the flow control 
> > > > > condition should be
> > > > > +  * rare.
> > > > > +  */
> > > > > +retry:
> > > > >   spin_lock_irqsave(&ct->ctbs.send.lock, flags);
> > > > > + if (unlikely(!ctb_has_room(ctb, len + 1))) {
> > > > > + spin_unlock_irqrestore(&ct->ctbs.send.lock, 
> > > > > flags);
> > > > > + cond_resched();
> > > > > + goto retry;
> > > > > + }
> > > > 
> > > >  If this patch is about adding a non-blocking send function, 
> > > >  and below we can
> > > >  see that it creates a fork:
> > > > 
> > > >  intel_guc_ct_send:
> > > >  ...
> > > > if (flags & INTEL_GUC_SEND_NB)
> > > > return ct_send_nb(ct, action, len, flags);
> > > > 
> > > > ret = ct_send(ct, action, len, response_buf, 
> > > >  response_buf_size, &status);
> > > > 
> > > >  Then why is there a change in ct_send here, which is not the 
> > > >  new
> > > >  non-blocking path?
> > > > 
> > > > >>>
> > > > >>> There is not a change to ct_send(), just to intel_guc_ct_send.
> > > > >>
> > > > >> I was doing by the diff which says:
> > > > >>
> > > > >> static int ct_send(struct intel_guc_ct *ct,
> > > > >> const u32 *action,
> > > > >> u32 len,
> > > > >> @@ -473,6 +541,7 @@ static int ct_send(struct intel_guc_ct *ct,
> > > > >> u32 response_buf_size,
> > > > >> u32 *status)
> > > > >> {
> > > > >> +struct intel_

Re: [PATCH 45/47] drm/i915/guc: Include scheduling policies in the debugfs state dump

2021-06-24 Thread Matthew Brost

On Thu, Jun 24, 2021 at 12:05:14AM -0700, Matthew Brost wrote:
> From: John Harrison 
> 
> Added the scheduling policy parameters to the 'guc_info' debugfs state
> dump.
> 
> Signed-off-by: John Harrison 
> Signed-off-by: Matthew Brost 

Reviewed-by: Matthew Brost 

> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 13 +
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h |  2 ++
>  drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c |  2 ++
>  3 files changed, 17 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> index c6d0b762d82c..b8182844aa00 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> @@ -92,6 +92,19 @@ static void guc_policies_init(struct intel_guc *guc, 
> struct guc_policies *polici
>   policies->is_valid = 1;
>  }
>  
> +void intel_guc_log_policy_info(struct intel_guc *guc, struct drm_printer *dp)
> +{
> + struct __guc_ads_blob *blob = guc->ads_blob;
> +
> + if (unlikely(!blob))
> + return;
> +
> + drm_printf(dp, "Global scheduling policies:\n");
> + drm_printf(dp, "  DPC promote time   = %u\n", 
> blob->policies.dpc_promote_time);
> + drm_printf(dp, "  Max num work items = %u\n", 
> blob->policies.max_num_work_items);
> + drm_printf(dp, "  Flags  = %u\n", 
> blob->policies.global_flags);
> +}
> +
>  static int guc_action_policies_update(struct intel_guc *guc, u32 
> policy_offset)
>  {
>   u32 action[] = {
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h
> index b00d3ae1113a..0fdcb3583601 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h
> @@ -7,9 +7,11 @@
>  #define _INTEL_GUC_ADS_H_
>  
>  struct intel_guc;
> +struct drm_printer;
>  
>  int intel_guc_ads_create(struct intel_guc *guc);
>  void intel_guc_ads_destroy(struct intel_guc *guc);
>  void intel_guc_ads_reset(struct intel_guc *guc);
> +void intel_guc_log_policy_info(struct intel_guc *guc, struct drm_printer *p);
>  
>  #endif
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
> index 62b9ce0fafaa..9a03ff56e654 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
> @@ -10,6 +10,7 @@
>  #include "intel_guc_debugfs.h"
>  #include "intel_guc_log_debugfs.h"
>  #include "gt/uc/intel_guc_ct.h"
> +#include "gt/uc/intel_guc_ads.h"
>  #include "gt/uc/intel_guc_submission.h"
>  
>  static int guc_info_show(struct seq_file *m, void *data)
> @@ -29,6 +30,7 @@ static int guc_info_show(struct seq_file *m, void *data)
>  
>   intel_guc_log_ct_info(&guc->ct, &p);
>   intel_guc_log_submission_info(guc, &p);
> + intel_guc_log_policy_info(guc, &p);
>  
>   return 0;
>  }
> -- 
> 2.28.0
>

Re: [Intel-gfx] [PATCH 40/47] drm/i915/guc: Enable GuC engine reset

2021-06-24 Thread Matthew Brost

On Thu, Jun 24, 2021 at 12:05:09AM -0700, Matthew Brost wrote:
> From: John Harrison 
> 
> Clear the 'disable resets' flag to allow GuC to reset hung contexts
> (detected via pre-emption timeout).
> 
> Signed-off-by: John Harrison 
> Signed-off-by: Matthew Brost 

Reviewed-by: Matthew Brost 

> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> index 9fd3c911f5fb..d3e86ab7508f 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> @@ -81,8 +81,7 @@ static void guc_policies_init(struct guc_policies *policies)
>  {
>   policies->dpc_promote_time = GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US;
>   policies->max_num_work_items = GLOBAL_POLICY_MAX_NUM_WI;
> - /* Disable automatic resets as not yet supported. */
> - policies->global_flags = GLOBAL_POLICY_DISABLE_ENGINE_RESET;
> + policies->global_flags = 0;
>   policies->is_valid = 1;
>  }
>  
> -- 
> 2.28.0
> 
> ___
> Intel-gfx mailing list
> intel-...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [PATCH v14 06/12] swiotlb: Use is_swiotlb_force_bounce for swiotlb data bouncing

2021-06-24 Thread Claire Chang

On Thu, Jun 24, 2021 at 11:56 PM Konrad Rzeszutek Wilk
 wrote:
>
> On Thu, Jun 24, 2021 at 10:10:51AM -0400, Qian Cai wrote:
> >
> >
> > On 6/24/2021 7:48 AM, Will Deacon wrote:
> > > Ok, diff below which attempts to tackle the offset issue I mentioned as
> > > well. Qian Cai -- please can you try with these changes?
> >
> > This works fine.
>
> Cool. Let me squash this patch in #6 and rebase the rest of them.
>
> Claire, could you check the devel/for-linus-5.14 say by end of today to
> double check that I didn't mess anything up please?

I just submitted v15 here
(https://lore.kernel.org/patchwork/cover/1451322/) in case it's
helpful.
I'll double check of course. Thanks for the efforts!

>
> Will,
>
> Thank you for generating the fix! I am going to run it on x86 and Xen
> to make sure all is good (granted last time I ran devel/for-linus-5.14
> on that setup I didn't see any errors so I need to double check
> I didn't do something silly like run a wrong kernel).
>
>
> >
> > >
> > > Will
> > >
> > > --->8
> > >
> > > diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
> > > index 175b6c113ed8..39284ff2a6cd 100644
> > > --- a/include/linux/swiotlb.h
> > > +++ b/include/linux/swiotlb.h
> > > @@ -116,7 +116,9 @@ static inline bool is_swiotlb_buffer(struct device 
> > > *dev, phys_addr_t paddr)
> > >
> > >  static inline bool is_swiotlb_force_bounce(struct device *dev)
> > >  {
> > > -   return dev->dma_io_tlb_mem->force_bounce;
> > > +   struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
> > > +
> > > +   return mem && mem->force_bounce;
> > >  }
> > >
> > >  void __init swiotlb_exit(void);
> > > diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> > > index 44be8258e27b..0ffbaae9fba2 100644
> > > --- a/kernel/dma/swiotlb.c
> > > +++ b/kernel/dma/swiotlb.c
> > > @@ -449,6 +449,7 @@ static int swiotlb_find_slots(struct device *dev, 
> > > phys_addr_t orig_addr,
> > > dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
> > > unsigned int nslots = nr_slots(alloc_size), stride;
> > > unsigned int index, wrap, count = 0, i;
> > > +   unsigned int offset = swiotlb_align_offset(dev, orig_addr);
> > > unsigned long flags;
> > >
> > > BUG_ON(!nslots);
> > > @@ -497,7 +498,7 @@ static int swiotlb_find_slots(struct device *dev, 
> > > phys_addr_t orig_addr,
> > > for (i = index; i < index + nslots; i++) {
> > > mem->slots[i].list = 0;
> > > mem->slots[i].alloc_size =
> > > -   alloc_size - ((i - index) << IO_TLB_SHIFT);
> > > +   alloc_size - (offset + ((i - index) << 
> > > IO_TLB_SHIFT));
> > > }
> > > for (i = index - 1;
> > >  io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
> > >

Re: [Intel-gfx] [PATCH 39/47] drm/i915/guc: Don't complain about reset races

2021-06-24 Thread Matthew Brost

On Thu, Jun 24, 2021 at 12:05:08AM -0700, Matthew Brost wrote:
> From: John Harrison 
> 
> It is impossible to seal all race conditions of resets occurring
> concurrent to other operations. At least, not without introducing
> excesive mutex locking. Instead, don't complain if it occurs. In
> particular, don't complain if trying to send a H2G during a reset.
> Whatever the H2G was about should get redone once the reset is over.
> 
> Signed-off-by: John Harrison 
> Signed-off-by: Matthew Brost 

Reviewed-by: Matthew Brost 

> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 5 -
>  drivers/gpu/drm/i915/gt/uc/intel_uc.c | 3 +++
>  drivers/gpu/drm/i915/gt/uc/intel_uc.h | 2 ++
>  3 files changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> index dd6177c8d75c..3b32755f892e 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> @@ -727,7 +727,10 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 
> *action, u32 len,
>   int ret;
>  
>   if (unlikely(!ct->enabled)) {
> - WARN(1, "Unexpected send: action=%#x\n", *action);
> + struct intel_guc *guc = ct_to_guc(ct);
> + struct intel_uc *uc = container_of(guc, struct intel_uc, guc);
> +
> + WARN(!uc->reset_in_progress, "Unexpected send: action=%#x\n", 
> *action);
>   return -ENODEV;
>   }
>  
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> index b523a8521351..77c1fe2ed883 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> @@ -550,6 +550,7 @@ void intel_uc_reset_prepare(struct intel_uc *uc)
>  {
>   struct intel_guc *guc = &uc->guc;
>  
> + uc->reset_in_progress = true;
>  
>   /* Nothing to do if GuC isn't supported */
>   if (!intel_uc_supports_guc(uc))
> @@ -579,6 +580,8 @@ void intel_uc_reset_finish(struct intel_uc *uc)
>  {
>   struct intel_guc *guc = &uc->guc;
>  
> + uc->reset_in_progress = false;
> +
>   /* Firmware expected to be running when this function is called */
>   if (intel_guc_is_fw_running(guc) && intel_uc_uses_guc_submission(uc))
>   intel_guc_submission_reset_finish(guc);
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h 
> b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
> index eaa3202192ac..91315e3f1c58 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
> @@ -30,6 +30,8 @@ struct intel_uc {
>  
>   /* Snapshot of GuC log from last failed load */
>   struct drm_i915_gem_object *load_err_log;
> +
> + bool reset_in_progress;
>  };
>  
>  void intel_uc_init_early(struct intel_uc *uc);
> -- 
> 2.28.0
> 
> ___
> Intel-gfx mailing list
> intel-...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 04/47] drm/i915/guc: Add non blocking CTB send function

2021-06-24 Thread Matthew Brost

On Thu, Jun 24, 2021 at 04:48:32PM +0200, Michal Wajdeczko wrote:
> 
> 
> On 24.06.2021 09:04, Matthew Brost wrote:
> > Add non blocking CTB send function, intel_guc_send_nb. GuC submission
> > will send CTBs in the critical path and does not need to wait for these
> > CTBs to complete before moving on, hence the need for this new function.
> > 
> > The non-blocking CTB now must have a flow control mechanism to ensure
> > the buffer isn't overrun. A lazy spin wait is used as we believe the
> > flow control condition should be rare with a properly sized buffer.
> > 
> > The function, intel_guc_send_nb, is exported in this patch but unused.
> > Several patches later in the series make use of this function.
> > 
> > Signed-off-by: John Harrison 
> > Signed-off-by: Matthew Brost 
> > ---
> >  drivers/gpu/drm/i915/gt/uc/intel_guc.h| 12 +++-
> >  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 77 +--
> >  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |  3 +-
> >  3 files changed, 82 insertions(+), 10 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > index 4abc59f6f3cd..24b1df6ad4ae 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > @@ -74,7 +74,15 @@ static inline struct intel_guc *log_to_guc(struct 
> > intel_guc_log *log)
> >  static
> >  inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 
> > len)
> >  {
> > -   return intel_guc_ct_send(&guc->ct, action, len, NULL, 0);
> > +   return intel_guc_ct_send(&guc->ct, action, len, NULL, 0, 0);
> > +}
> > +
> > +#define INTEL_GUC_SEND_NB  BIT(31)
> 
> hmm, this flag really belongs to intel_guc_ct_send() so it should be
> defined as CTB flag near that function declaration
> 

I can move this up a few lines.

> > +static
> > +inline int intel_guc_send_nb(struct intel_guc *guc, const u32 *action, u32 
> > len)
> > +{
> > +   return intel_guc_ct_send(&guc->ct, action, len, NULL, 0,
> > +INTEL_GUC_SEND_NB);
> >  }
> >  
> >  static inline int
> > @@ -82,7 +90,7 @@ intel_guc_send_and_receive(struct intel_guc *guc, const 
> > u32 *action, u32 len,
> >u32 *response_buf, u32 response_buf_size)
> >  {
> > return intel_guc_ct_send(&guc->ct, action, len,
> > -response_buf, response_buf_size);
> > +response_buf, response_buf_size, 0);
> >  }
> >  
> >  static inline void intel_guc_to_host_event_handler(struct intel_guc *guc)
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > index a17215920e58..c9a65d05911f 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > @@ -3,6 +3,11 @@
> >   * Copyright © 2016-2019 Intel Corporation
> >   */
> >  
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +
> >  #include "i915_drv.h"
> >  #include "intel_guc_ct.h"
> >  #include "gt/intel_gt.h"
> > @@ -373,7 +378,7 @@ static void write_barrier(struct intel_guc_ct *ct)
> >  static int ct_write(struct intel_guc_ct *ct,
> > const u32 *action,
> > u32 len /* in dwords */,
> > -   u32 fence)
> > +   u32 fence, u32 flags)
> >  {
> > struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
> > struct guc_ct_buffer_desc *desc = ctb->desc;
> > @@ -421,9 +426,13 @@ static int ct_write(struct intel_guc_ct *ct,
> >  FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) |
> >  FIELD_PREP(GUC_CTB_MSG_0_FENCE, fence);
> >  
> > -   hxg = FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
> > - FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION |
> > -GUC_HXG_REQUEST_MSG_0_DATA0, action[0]);
> > +   hxg = (flags & INTEL_GUC_SEND_NB) ?
> > +   (FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
> > +FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
> > +   GUC_HXG_EVENT_MSG_0_DATA0, action[0])) :
> > +   (FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
> > +FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION |
> > +   GUC_HXG_REQUEST_MSG_0_DATA0, action[0]));
> 
> or as we already switched to accept and return whole HXG messages in
> guc_send_mmio() maybe we should do the same for CTB variant too and
> instead of using extra flag just let caller to prepare proper HXG header
> with HXG_EVENT type and then in CTB code just look at this type to make
> decision which code path to use
>

Not sure I follow. Anyways could this be done in a follow up by you if
want this change.
 
> note that existing callers should not be impacted, as full HXG header
> for the REQUEST message looks exactly the same as "action" code alone.
> 
> >  
> > CT_DEBUG(ct, "writing (tail %u) %*ph %*ph %*ph\n",
> >  tail, 4, &header, 4, &hxg, 4 * (len - 1), &acti

[PATCH v15 12/12] of: Add plumbing for restricted DMA pool

2021-06-24 Thread Claire Chang

If a device is not behind an IOMMU, we look up the device node and set
up the restricted DMA when the restricted-dma-pool is presented.

Signed-off-by: Claire Chang 
Tested-by: Stefano Stabellini 
Tested-by: Will Deacon 
---
 drivers/of/address.c| 33 +
 drivers/of/device.c |  3 +++
 drivers/of/of_private.h |  6 ++
 3 files changed, 42 insertions(+)

diff --git a/drivers/of/address.c b/drivers/of/address.c
index 73ddf2540f3f..cdf700fba5c4 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -8,6 +8,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1022,6 +1023,38 @@ int of_dma_get_range(struct device_node *np, const 
struct bus_dma_region **map)
of_node_put(node);
return ret;
 }
+
+int of_dma_set_restricted_buffer(struct device *dev, struct device_node *np)
+{
+   struct device_node *node, *of_node = dev->of_node;
+   int count, i;
+
+   count = of_property_count_elems_of_size(of_node, "memory-region",
+   sizeof(u32));
+   /*
+* If dev->of_node doesn't exist or doesn't contain memory-region, try
+* the OF node having DMA configuration.
+*/
+   if (count <= 0) {
+   of_node = np;
+   count = of_property_count_elems_of_size(
+   of_node, "memory-region", sizeof(u32));
+   }
+
+   for (i = 0; i < count; i++) {
+   node = of_parse_phandle(of_node, "memory-region", i);
+   /*
+* There might be multiple memory regions, but only one
+* restricted-dma-pool region is allowed.
+*/
+   if (of_device_is_compatible(node, "restricted-dma-pool") &&
+   of_device_is_available(node))
+   return of_reserved_mem_device_init_by_idx(dev, of_node,
+ i);
+   }
+
+   return 0;
+}
 #endif /* CONFIG_HAS_DMA */
 
 /**
diff --git a/drivers/of/device.c b/drivers/of/device.c
index 6cb86de404f1..e68316836a7a 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -165,6 +165,9 @@ int of_dma_configure_id(struct device *dev, struct 
device_node *np,
 
arch_setup_dma_ops(dev, dma_start, size, iommu, coherent);
 
+   if (!iommu)
+   return of_dma_set_restricted_buffer(dev, np);
+
return 0;
 }
 EXPORT_SYMBOL_GPL(of_dma_configure_id);
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index d9e6a324de0a..25cebbed5f02 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -161,12 +161,18 @@ struct bus_dma_region;
 #if defined(CONFIG_OF_ADDRESS) && defined(CONFIG_HAS_DMA)
 int of_dma_get_range(struct device_node *np,
const struct bus_dma_region **map);
+int of_dma_set_restricted_buffer(struct device *dev, struct device_node *np);
 #else
 static inline int of_dma_get_range(struct device_node *np,
const struct bus_dma_region **map)
 {
return -ENODEV;
 }
+static inline int of_dma_set_restricted_buffer(struct device *dev,
+  struct device_node *np)
+{
+   return -ENODEV;
+}
 #endif
 
 #endif /* _LINUX_OF_PRIVATE_H */
-- 
2.32.0.288.g62a8d224e6-goog

[PATCH v15 11/12] dt-bindings: of: Add restricted DMA pool

2021-06-24 Thread Claire Chang

Introduce the new compatible string, restricted-dma-pool, for restricted
DMA. One can specify the address and length of the restricted DMA memory
region by restricted-dma-pool in the reserved-memory node.

Signed-off-by: Claire Chang 
Tested-by: Stefano Stabellini 
Tested-by: Will Deacon 
---
 .../reserved-memory/reserved-memory.txt   | 36 +--
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git 
a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt 
b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
index e8d3096d922c..39b5f4c5a511 100644
--- a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
+++ b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
@@ -51,6 +51,23 @@ compatible (optional) - standard definition
   used as a shared pool of DMA buffers for a set of devices. It can
   be used by an operating system to instantiate the necessary pool
   management subsystem if necessary.
+- restricted-dma-pool: This indicates a region of memory meant to be
+  used as a pool of restricted DMA buffers for a set of devices. The
+  memory region would be the only region accessible to those devices.
+  When using this, the no-map and reusable properties must not be set,
+  so the operating system can create a virtual mapping that will be 
used
+  for synchronization. The main purpose for restricted DMA is to
+  mitigate the lack of DMA access control on systems without an IOMMU,
+  which could result in the DMA accessing the system memory at
+  unexpected times and/or unexpected addresses, possibly leading to 
data
+  leakage or corruption. The feature on its own provides a basic level
+  of protection against the DMA overwriting buffer contents at
+  unexpected times. However, to protect against general data leakage 
and
+  system memory corruption, the system needs to provide way to lock 
down
+  the memory access, e.g., MPU. Note that since coherent allocation
+  needs remapping, one must set up another device coherent pool by
+  shared-dma-pool and use dma_alloc_from_dev_coherent instead for 
atomic
+  coherent allocation.
 - vendor specific string in the form ,[-]
 no-map (optional) - empty property
 - Indicates the operating system must not create a virtual mapping
@@ -85,10 +102,11 @@ memory-region-names (optional) - a list of names, one for 
each corresponding
 
 Example
 ---
-This example defines 3 contiguous regions are defined for Linux kernel:
+This example defines 4 contiguous regions for Linux kernel:
 one default of all device drivers (named linux,cma@7200 and 64MiB in size),
-one dedicated to the framebuffer device (named framebuffer@7800, 8MiB), and
-one for multimedia processing (named multimedia-memory@7700, 64MiB).
+one dedicated to the framebuffer device (named framebuffer@7800, 8MiB),
+one for multimedia processing (named multimedia-memory@7700, 64MiB), and
+one for restricted dma pool (named restricted_dma_reserved@0x5000, 64MiB).
 
 / {
#address-cells = <1>;
@@ -120,6 +138,11 @@ one for multimedia processing (named 
multimedia-memory@7700, 64MiB).
compatible = "acme,multimedia-memory";
reg = <0x7700 0x400>;
};
+
+   restricted_dma_reserved: restricted_dma_reserved {
+   compatible = "restricted-dma-pool";
+   reg = <0x5000 0x400>;
+   };
};
 
/* ... */
@@ -138,4 +161,11 @@ one for multimedia processing (named 
multimedia-memory@7700, 64MiB).
memory-region = <&multimedia_reserved>;
/* ... */
};
+
+   pcie_device: pcie_device@0,0 {
+   reg = <0x8301 0x0 0x 0x0 0x0010
+  0x8301 0x0 0x0010 0x0 0x0010>;
+   memory-region = <&restricted_dma_reserved>;
+   /* ... */
+   };
 };
-- 
2.32.0.288.g62a8d224e6-goog

[PATCH v15 10/12] swiotlb: Add restricted DMA pool initialization

2021-06-24 Thread Claire Chang

Add the initialization function to create restricted DMA pools from
matching reserved-memory nodes.

Regardless of swiotlb setting, the restricted DMA pool is preferred if
available.

The restricted DMA pools provide a basic level of protection against the
DMA overwriting buffer contents at unexpected times. However, to protect
against general data leakage and system memory corruption, the system
needs to provide a way to lock down the memory access, e.g., MPU.

Signed-off-by: Claire Chang 
Reviewed-by: Christoph Hellwig 
Tested-by: Stefano Stabellini 
Tested-by: Will Deacon 
---
 include/linux/swiotlb.h |  3 +-
 kernel/dma/Kconfig  | 14 
 kernel/dma/swiotlb.c| 76 +
 3 files changed, 92 insertions(+), 1 deletion(-)

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 3b9454d1e498..39284ff2a6cd 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -73,7 +73,8 @@ extern enum swiotlb_force swiotlb_force;
  * range check to see if the memory was in fact allocated by this
  * API.
  * @nslabs:The number of IO TLB blocks (in groups of 64) between @start and
- * @end. This is command line adjustable via setup_io_tlb_npages.
+ * @end. For default swiotlb, this is command line adjustable via
+ * setup_io_tlb_npages.
  * @used:  The number of used IO TLB block.
  * @list:  The free list describing the number of free entries available
  * from each index.
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 77b405508743..3e961dc39634 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -80,6 +80,20 @@ config SWIOTLB
bool
select NEED_DMA_MAP_STATE
 
+config DMA_RESTRICTED_POOL
+   bool "DMA Restricted Pool"
+   depends on OF && OF_RESERVED_MEM
+   select SWIOTLB
+   help
+ This enables support for restricted DMA pools which provide a level of
+ DMA memory protection on systems with limited hardware protection
+ capabilities, such as those lacking an IOMMU.
+
+ For more information see
+ 

+ and .
+ If unsure, say "n".
+
 #
 # Should be selected if we can mmap non-coherent mappings to userspace.
 # The only thing that is really required is a way to set an uncached bit
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 6a7c6e30eb4b..3baf49c9b766 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -39,6 +39,13 @@
 #ifdef CONFIG_DEBUG_FS
 #include 
 #endif
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+#include 
+#include 
+#include 
+#include 
+#include 
+#endif
 
 #include 
 #include 
@@ -737,4 +744,73 @@ bool swiotlb_free(struct device *dev, struct page *page, 
size_t size)
return true;
 }
 
+static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
+   struct device *dev)
+{
+   struct io_tlb_mem *mem = rmem->priv;
+   unsigned long nslabs = rmem->size >> IO_TLB_SHIFT;
+
+   /*
+* Since multiple devices can share the same pool, the private data,
+* io_tlb_mem struct, will be initialized by the first device attached
+* to it.
+*/
+   if (!mem) {
+   mem = kzalloc(struct_size(mem, slots, nslabs), GFP_KERNEL);
+   if (!mem)
+   return -ENOMEM;
+
+   set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
+rmem->size >> PAGE_SHIFT);
+   swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, false);
+   mem->force_bounce = true;
+   mem->for_alloc = true;
+
+   rmem->priv = mem;
+
+   if (IS_ENABLED(CONFIG_DEBUG_FS)) {
+   mem->debugfs =
+   debugfs_create_dir(rmem->name, debugfs_dir);
+   swiotlb_create_debugfs_files(mem);
+   }
+   }
+
+   dev->dma_io_tlb_mem = mem;
+
+   return 0;
+}
+
+static void rmem_swiotlb_device_release(struct reserved_mem *rmem,
+   struct device *dev)
+{
+   dev->dma_io_tlb_mem = io_tlb_default_mem;
+}
+
+static const struct reserved_mem_ops rmem_swiotlb_ops = {
+   .device_init = rmem_swiotlb_device_init,
+   .device_release = rmem_swiotlb_device_release,
+};
+
+static int __init rmem_swiotlb_setup(struct reserved_mem *rmem)
+{
+   unsigned long node = rmem->fdt_node;
+
+   if (of_get_flat_dt_prop(node, "reusable", NULL) ||
+   of_get_flat_dt_prop(node, "linux,cma-default", NULL) ||
+   of_get_flat_dt_prop(node, "linux,dma-default", NULL) ||
+   of_get_flat_dt_prop(node, "no-map", NULL))
+   return -EINVAL;
+
+   if (PageHighMem(pfn_to_page(PHYS_PFN(rmem->base {
+   pr_err("Restricted DMA pool must be accessible within the 
linear mapping.");
+   return -EINVAL

[PATCH v15 09/12] swiotlb: Add restricted DMA alloc/free support

2021-06-24 Thread Claire Chang

Add the functions, swiotlb_{alloc,free} and is_swiotlb_for_alloc to
support the memory allocation from restricted DMA pool.

The restricted DMA pool is preferred if available.

Note that since coherent allocation needs remapping, one must set up
another device coherent pool by shared-dma-pool and use
dma_alloc_from_dev_coherent instead for atomic coherent allocation.

Signed-off-by: Claire Chang 
Reviewed-by: Christoph Hellwig 
Tested-by: Stefano Stabellini 
Tested-by: Will Deacon 
Acked-by: Stefano Stabellini 
---
 include/linux/swiotlb.h | 26 ++
 kernel/dma/direct.c | 49 +++--
 kernel/dma/swiotlb.c| 38 ++--
 3 files changed, 99 insertions(+), 14 deletions(-)

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index da348671b0d5..3b9454d1e498 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -85,6 +85,7 @@ extern enum swiotlb_force swiotlb_force;
  * @debugfs:   The dentry to debugfs.
  * @late_alloc:%true if allocated using the page allocator
  * @force_bounce: %true if swiotlb bouncing is forced
+ * @for_alloc:  %true if the pool is used for memory allocation
  */
 struct io_tlb_mem {
phys_addr_t start;
@@ -96,6 +97,7 @@ struct io_tlb_mem {
struct dentry *debugfs;
bool late_alloc;
bool force_bounce;
+   bool for_alloc;
struct io_tlb_slot {
phys_addr_t orig_addr;
size_t alloc_size;
@@ -158,4 +160,28 @@ static inline void swiotlb_adjust_size(unsigned long size)
 extern void swiotlb_print_info(void);
 extern void swiotlb_set_max_segment(unsigned int);
 
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+struct page *swiotlb_alloc(struct device *dev, size_t size);
+bool swiotlb_free(struct device *dev, struct page *page, size_t size);
+
+static inline bool is_swiotlb_for_alloc(struct device *dev)
+{
+   return dev->dma_io_tlb_mem->for_alloc;
+}
+#else
+static inline struct page *swiotlb_alloc(struct device *dev, size_t size)
+{
+   return NULL;
+}
+static inline bool swiotlb_free(struct device *dev, struct page *page,
+   size_t size)
+{
+   return false;
+}
+static inline bool is_swiotlb_for_alloc(struct device *dev)
+{
+   return false;
+}
+#endif /* CONFIG_DMA_RESTRICTED_POOL */
+
 #endif /* __LINUX_SWIOTLB_H */
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index a92465b4eb12..2de33e5d302b 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -75,6 +75,15 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t 
phys, size_t size)
min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit);
 }
 
+static void __dma_direct_free_pages(struct device *dev, struct page *page,
+   size_t size)
+{
+   if (IS_ENABLED(CONFIG_DMA_RESTRICTED_POOL) &&
+   swiotlb_free(dev, page, size))
+   return;
+   dma_free_contiguous(dev, page, size);
+}
+
 static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
gfp_t gfp)
 {
@@ -86,6 +95,16 @@ static struct page *__dma_direct_alloc_pages(struct device 
*dev, size_t size,
 
gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
   &phys_limit);
+   if (IS_ENABLED(CONFIG_DMA_RESTRICTED_POOL) &&
+   is_swiotlb_for_alloc(dev)) {
+   page = swiotlb_alloc(dev, size);
+   if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
+   __dma_direct_free_pages(dev, page, size);
+   return NULL;
+   }
+   return page;
+   }
+
page = dma_alloc_contiguous(dev, size, gfp);
if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
dma_free_contiguous(dev, page, size);
@@ -142,7 +161,7 @@ void *dma_direct_alloc(struct device *dev, size_t size,
gfp |= __GFP_NOWARN;
 
if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
-   !force_dma_unencrypted(dev)) {
+   !force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev)) {
page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO);
if (!page)
return NULL;
@@ -155,18 +174,23 @@ void *dma_direct_alloc(struct device *dev, size_t size,
}
 
if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
-   !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
-   !dev_is_dma_coherent(dev))
+   !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !dev_is_dma_coherent(dev) &&
+   !is_swiotlb_for_alloc(dev))
return arch_dma_alloc(dev, size, dma_handle, gfp, attrs);
 
/*
 * Remapping or decrypting memory may block. If either is required and
 * we can't block, allocate the memory from the atomic pools.
+* If restricted DMA (i.e., is_swiotlb_for_alloc) i

[PATCH v15 07/12] swiotlb: Move alloc_size to swiotlb_find_slots

2021-06-24 Thread Claire Chang

Rename find_slots to swiotlb_find_slots and move the maintenance of
alloc_size to it for better code reusability later.

Signed-off-by: Claire Chang 
Reviewed-by: Christoph Hellwig 
Tested-by: Stefano Stabellini 
Tested-by: Will Deacon 
---
 kernel/dma/swiotlb.c | 17 +
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 0d294bbf274c..b41d16e92cf6 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -432,8 +432,8 @@ static unsigned int wrap_index(struct io_tlb_mem *mem, 
unsigned int index)
  * Find a suitable number of IO TLB entries size that will fit this request and
  * allocate a buffer from that IO TLB pool.
  */
-static int find_slots(struct device *dev, phys_addr_t orig_addr,
-   size_t alloc_size)
+static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
+ size_t alloc_size)
 {
struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
unsigned long boundary_mask = dma_get_seg_boundary(dev);
@@ -444,6 +444,7 @@ static int find_slots(struct device *dev, phys_addr_t 
orig_addr,
dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
unsigned int nslots = nr_slots(alloc_size), stride;
unsigned int index, wrap, count = 0, i;
+   unsigned int offset = swiotlb_align_offset(dev, orig_addr);
unsigned long flags;
 
BUG_ON(!nslots);
@@ -488,8 +489,11 @@ static int find_slots(struct device *dev, phys_addr_t 
orig_addr,
return -1;
 
 found:
-   for (i = index; i < index + nslots; i++)
+   for (i = index; i < index + nslots; i++) {
mem->slots[i].list = 0;
+   mem->slots[i].alloc_size =
+   alloc_size - (offset + ((i - index) << IO_TLB_SHIFT));
+   }
for (i = index - 1;
 io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
 mem->slots[i].list; i--)
@@ -530,7 +534,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, 
phys_addr_t orig_addr,
return (phys_addr_t)DMA_MAPPING_ERROR;
}
 
-   index = find_slots(dev, orig_addr, alloc_size + offset);
+   index = swiotlb_find_slots(dev, orig_addr, alloc_size + offset);
if (index == -1) {
if (!(attrs & DMA_ATTR_NO_WARN))
dev_warn_ratelimited(dev,
@@ -544,11 +548,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, 
phys_addr_t orig_addr,
 * This is needed when we sync the memory.  Then we sync the buffer if
 * needed.
 */
-   for (i = 0; i < nr_slots(alloc_size + offset); i++) {
+   for (i = 0; i < nr_slots(alloc_size + offset); i++)
mem->slots[index + i].orig_addr = slot_addr(orig_addr, i);
-   mem->slots[index + i].alloc_size =
-   alloc_size - (i << IO_TLB_SHIFT);
-   }
tlb_addr = slot_addr(mem->start, index) + offset;
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
-- 
2.32.0.288.g62a8d224e6-goog

1 2 3 4 >

1 - 100 of 301 matches

Mail list logo