[PATCH] drm/msm/dp: Add missing drm_device backpointer
'6cba3fe43341 ("drm/dp: Add backpointer to drm_device in drm_dp_aux")' introduced a mandator drm_device backpointer in struct drm_dp_aux, but missed the msm DP driver. Fix this. Fixes: 6cba3fe43341 ("drm/dp: Add backpointer to drm_device in drm_dp_aux") Signed-off-by: Bjorn Andersson --- drivers/gpu/drm/msm/dp/dp_aux.c | 3 ++- drivers/gpu/drm/msm/dp/dp_aux.h | 2 +- drivers/gpu/drm/msm/dp/dp_display.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_aux.c b/drivers/gpu/drm/msm/dp/dp_aux.c index 4a3293b590b0..88659ed200b9 100644 --- a/drivers/gpu/drm/msm/dp/dp_aux.c +++ b/drivers/gpu/drm/msm/dp/dp_aux.c @@ -441,7 +441,7 @@ void dp_aux_deinit(struct drm_dp_aux *dp_aux) dp_catalog_aux_enable(aux->catalog, false); } -int dp_aux_register(struct drm_dp_aux *dp_aux) +int dp_aux_register(struct drm_dp_aux *dp_aux, struct drm_device *drm_dev) { struct dp_aux_private *aux; int ret; @@ -455,6 +455,7 @@ int dp_aux_register(struct drm_dp_aux *dp_aux) aux->dp_aux.name = "dpu_dp_aux"; aux->dp_aux.dev = aux->dev; + aux->dp_aux.drm_dev = drm_dev; aux->dp_aux.transfer = dp_aux_transfer; ret = drm_dp_aux_register(&aux->dp_aux); if (ret) { diff --git a/drivers/gpu/drm/msm/dp/dp_aux.h b/drivers/gpu/drm/msm/dp/dp_aux.h index 0728cc09c9ec..7ef0d83b483a 100644 --- a/drivers/gpu/drm/msm/dp/dp_aux.h +++ b/drivers/gpu/drm/msm/dp/dp_aux.h @@ -9,7 +9,7 @@ #include "dp_catalog.h" #include -int dp_aux_register(struct drm_dp_aux *dp_aux); +int dp_aux_register(struct drm_dp_aux *dp_aux, struct drm_device *drm_dev); void dp_aux_unregister(struct drm_dp_aux *dp_aux); void dp_aux_isr(struct drm_dp_aux *dp_aux); void dp_aux_init(struct drm_dp_aux *dp_aux); diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index c26562bd85fe..2f0a5c13f251 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -259,7 +259,7 @@ static int dp_display_bind(struct device *dev, struct device *master, return rc; } - rc = dp_aux_register(dp->aux); + rc = dp_aux_register(dp->aux, drm); if (rc) { DRM_ERROR("DRM DP AUX register failed\n"); return rc; -- 2.29.2
Re: [PATCH 11/17] drm/msm/dpu: drop src_split and multirect check from dpu_crtc_atomic_check
Hi Dmitry, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on next-20210624] [also build test WARNING on v5.13-rc7] [cannot apply to linus/master v5.13-rc7 v5.13-rc6 v5.13-rc5] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Dmitry-Baryshkov/drm-msm-dpu-switch-dpu_plane-to-be-virtual/20210624-225947 base:2a8927f0efb6fb34b9d11dab3bd3f018e866d36d config: arm-defconfig (attached as .config) compiler: arm-linux-gnueabi-gcc (GCC) 9.3.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/0day-ci/linux/commit/3842e184f54916b9d22989d840a70bfb0bfebf10 git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Dmitry-Baryshkov/drm-msm-dpu-switch-dpu_plane-to-be-virtual/20210624-225947 git checkout 3842e184f54916b9d22989d840a70bfb0bfebf10 # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=arm If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot All warnings (new ones prefixed by >>): drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c: In function 'dpu_crtc_atomic_check': >> drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c:898:23: warning: variable >> 'mixer_width' set but not used [-Wunused-but-set-variable] 898 | int cnt = 0, rc = 0, mixer_width = 0, i; | ^~~ vim +/mixer_width +898 drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c 884 885 static int dpu_crtc_atomic_check(struct drm_crtc *crtc, 886 struct drm_atomic_state *state) 887 { 888 struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, 889 crtc); 890 struct dpu_crtc *dpu_crtc = to_dpu_crtc(crtc); 891 struct dpu_crtc_state *cstate = to_dpu_crtc_state(crtc_state); 892 struct plane_state *pstates; 893 894 const struct drm_plane_state *pstate; 895 struct drm_plane *plane; 896 struct drm_display_mode *mode; 897 > 898 int cnt = 0, rc = 0, mixer_width = 0, i; 899 900 struct drm_rect crtc_rect = { 0 }; 901 902 pstates = kzalloc(sizeof(*pstates) * DPU_STAGE_MAX * 4, GFP_KERNEL); 903 904 if (!crtc_state->enable || !crtc_state->active) { 905 DRM_DEBUG_ATOMIC("crtc%d -> enable %d, active %d, skip atomic_check\n", 906 crtc->base.id, crtc_state->enable, 907 crtc_state->active); 908 memset(&cstate->new_perf, 0, sizeof(cstate->new_perf)); 909 goto end; 910 } 911 912 mode = &crtc_state->adjusted_mode; 913 DRM_DEBUG_ATOMIC("%s: check\n", dpu_crtc->name); 914 915 /* force a full mode set if active state changed */ 916 if (crtc_state->active_changed) 917 crtc_state->mode_changed = true; 918 919 if (cstate->num_mixers) { 920 mixer_width = mode->hdisplay / cstate->num_mixers; 921 922 _dpu_crtc_setup_lm_bounds(crtc, crtc_state); 923 } 924 925 crtc_rect.x2 = mode->hdisplay; 926 crtc_rect.y2 = mode->vdisplay; 927 928 /* get plane state for all drm planes associated with crtc state */ 929 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) { 930 struct drm_rect dst, clip = crtc_rect; 931 932 if (IS_ERR_OR_NULL(pstate)) { 933 rc = PTR_ERR(pstate); 934 DPU_ERROR("%s: failed to get plane%d state, %d\n", 935 dpu_crtc->name, plane->base.id, rc); 936 goto end; 937 } 938 if (cnt >= DPU_STAGE_MAX * 4) 939 continue; 940 941 pstates[cnt].dpu_pstate = to_dpu_plane_state(pstate); 942 pstates[cnt].drm_pstate = pstate; 943 pstates[cnt].stage = pstate->normalized_zpos; 944 945 dpu_plane_clear_multirect(pstate); 946 947 cnt++; 948 949 dst = d
Re: [PATCH v5 3/5] drm/msm: Improve the a6xx page fault handler
On Thu 10 Jun 16:44 CDT 2021, Rob Clark wrote: [..] > diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c > index 50d881794758..6975b95c3c29 100644 > --- a/drivers/gpu/drm/msm/msm_iommu.c > +++ b/drivers/gpu/drm/msm/msm_iommu.c > @@ -211,8 +211,17 @@ static int msm_fault_handler(struct iommu_domain > *domain, struct device *dev, > unsigned long iova, int flags, void *arg) > { > struct msm_iommu *iommu = arg; > + struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(iommu->base.dev); > + struct adreno_smmu_fault_info info, *ptr = NULL; > + > + if (adreno_smmu->get_fault_info) { This seemed reasonable when I read it last time, but I didn't realize that the msm_fault_handler() is installed for all msm_iommu instances. So while we're trying to recover from the boot splash and setup the new framebuffer we end up here with iommu->base.dev being the mdss device. Naturally drvdata of mdss is not a struct adreno_smmu_priv. > + adreno_smmu->get_fault_info(adreno_smmu->cookie, &info); So here we just jump straight out into hyperspace, never to return. Not sure how to wire this up to avoid the problem, but right now I don't think we can boot any device with a boot splash. Regards, Bjorn > + ptr = &info; > + } > + > if (iommu->base.handler) > - return iommu->base.handler(iommu->base.arg, iova, flags); > + return iommu->base.handler(iommu->base.arg, iova, flags, ptr); > + > pr_warn_ratelimited("*** fault: iova=%16lx, flags=%d\n", iova, flags); > return 0; > }
[PATCH] drm/amdgpu: use kvcalloc for entry->entries.
kmalloc_array + __GFP_ZERO is the same with kvcalloc. Signed-off-by: huqiqiao --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9acee4a5b2ba..5a012321d09e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -909,8 +909,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, num_entries = amdgpu_vm_num_entries(adev, cursor->level); entry->entries = kvmalloc_array(num_entries, - sizeof(*entry->entries), - GFP_KERNEL | __GFP_ZERO); + sizeof(*entry->entries), GFP_KERNEL); if (!entry->entries) return -ENOMEM; } -- 2.11.0
[PATCH] drm/amdgpu: use kvcalloc for entry->entries
malloc_array + __GFP_ZERO is the same with kvcalloc. Signed-off-by: huqiqiao --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b8c31e3469c0..5cadfadc625f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -909,7 +909,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, num_entries = amdgpu_vm_num_entries(adev, cursor->level); entry->entries = kvcalloc(num_entries, - sizeof(*entry->entries), GFP_KERNEL); + sizeof(*entry->entries), GFP_KERNEL); if (!entry->entries) return -ENOMEM; } -- 2.11.0
Re: [PATH 0/4] [RFC] Support virtual DRM
On 2021/06/23 20:41, Pekka Paalanen wrote: > On Wed, 23 Jun 2021 18:22:47 +0900 > Esaki Tomohito wrote: > >> On 2021/06/23 17:39, Pekka Paalanen wrote: >>> On Wed, 23 Jun 2021 15:56:05 +0900 >>> Esaki Tomohito wrote: >>> Hi, Thank you all for your comments. On 2021/06/22 17:12, Pekka Paalanen wrote: > On Tue, 22 Jun 2021 13:03:39 +0900 > Esaki Tomohito wrote: > >> Hi, Enrico Weigelt >> Thank you for reply. >> >> On 2021/06/22 1:05, Enrico Weigelt, metux IT consult wrote: >>> On 21.06.21 08:27, Tomohito Esaki wrote: >>> >>> Hi, >>> Virtual DRM splits the overlay planes of a display controller into multiple virtual devices to allow each plane to be accessed by each process. This makes it possible to overlay images output from multiple processes on a display. For example, one process displays the camera image without compositor while another process overlays the UI. >>> >>> Are you attempting to create an simple in-kernel compositor ? >> >> I think the basic idea is the same as DRMlease. > > Hi, > > indeed. Why not use DRM leases instead? > In this use case, I understand that this is not possible with DRM lease, am I wrong? I understand that it’s not possible to lease a plane and update planes on the same output independently from different processes in current DRM lease. If this is correct, what do you think of adding support for plane leases to the DRM lease to handle this case? >>> >>> Hi, >>> >>> I would love to see support added for leasing individual planes, >>> especially to replace the virtual DRM proposal which seems to be >>> eradicating everything that atomic modesetting and nuclear pageflip >>> have built over the many years. >>> >>> However, please note that "on the same output independently" is >>> physically impossible. Semantically, the planes define what a CRTC >>> scans out, and the CRTC defines the scanout timings. Therefore it is not >>> possible to update individual planes independently, they will all >>> always share the timings of the CRTC. >>> >>> That combined with KMS not allowing multiple updates to be queued at >>> the same time for the same CRTC (atomic commits and legacy pageflips >>> returning EBUSY) makes the plane updates very much inter-dependent. >>> >>> If you want to avoid EBUSY and have planes update on the vblank you >>> intended, you really need a userspace compositor to pull everything >>> together *before* submitting anything to the kernel. >> >> Hi, >> >> Thank you for your comments and advice. >> I will consider leasing a plane. > > Hi, > > I wish you considered a userspace compositor first, once more, with > passion. > > It does not need to be Weston, and it does not need to use Wayland. > Just a userspace daemon that owns the whole display device and somehow > talks to whatever else wants stuff on screen. > > I have not seen any evidence that leasing individual planes would do > you any good. I can easily see it doing you harm. I'm only saying that > it would be better than the virtual DRM proposal if you absolutely have > to go there. Please, consider not going there at all. > > "On the same output independently" is not possible for the very simple > reason that the pixel data needs to be streamed serially to a monitor. > Hi, Thank you for your advice. Once again, I'll consider a userspace compositor first. Best regards Esaki
Re:Re: [PATCH] drm/amdgpu:use kvcalloc instead of kvmalloc_array
OK, I'll revise it and submit it againGeorge. From: "Christian König "To: "huqiqiao ","airlied ","daniel "CC: "dri-devel ","amd-gfx ","linux-kernel "Sent: 2021-06-24 21:14Subject: Re: [PATCH] drm/amdgpu:use kvcalloc instead of kvmalloc_array Am 23.06.21 um 11:12 schrieb huqiqiao: > kvmalloc_array + __GFP_ZERO is the same with kvcalloc. > > Signed-off-by: huqiqiao > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 ++--- > 1 file changed, 2 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index 9acee4a5b2ba..50edc73525b0 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -908,9 +908,8 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, > unsigned num_entries; > > num_entries = amdgpu_vm_num_entries(adev, cursor->level); > - entry->entries = kvmalloc_array(num_entries, > - sizeof(*entry->entries), > - GFP_KERNEL | __GFP_ZERO); > + entry->entries = kvcalloc(num_entries, > + sizeof(*entry->entries), GFP_KERNEL); Sounds like a good idea in general, but the indentation on the second line seems to be of. Christian. > if (!entry->entries) > return -ENOMEM; > }
Re: [Intel-gfx] [PATCH 44/47] drm/i915/guc: Connect reset modparam updates to GuC policy flags
On Thu, Jun 24, 2021 at 12:05:13AM -0700, Matthew Brost wrote: > From: John Harrison > > Changing the reset module parameter has no effect on a running GuC. > The corresponding entry in the ADS must be updated and then the GuC > informed via a Host2GuC message. > > The new debugfs interface to module parameters allows this to happen. > However, connecting the parameter data address back to anything useful > is messy. One option would be to pass a new private data structure > address through instead of just the parameter pointer. However, that > means having a new (and different) data structure for each parameter > and a new (and different) write function for each parameter. This > method keeps everything generic by instead using a string lookup on > the directory entry name. > > Signed-off-by: John Harrison > Signed-off-by: Matthew Brost > --- > drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 2 +- > drivers/gpu/drm/i915/i915_debugfs_params.c | 31 ++ > 2 files changed, 32 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c > b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c > index 2ad5fcd4e1b7..c6d0b762d82c 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c > @@ -99,7 +99,7 @@ static int guc_action_policies_update(struct intel_guc > *guc, u32 policy_offset) > policy_offset > }; > > - return intel_guc_send(guc, action, ARRAY_SIZE(action)); > + return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, > true); > } > > int intel_guc_global_policies_update(struct intel_guc *guc) > diff --git a/drivers/gpu/drm/i915/i915_debugfs_params.c > b/drivers/gpu/drm/i915/i915_debugfs_params.c > index 4e2b077692cb..8ecd8b42f048 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs_params.c > +++ b/drivers/gpu/drm/i915/i915_debugfs_params.c > @@ -6,9 +6,20 @@ > #include > > #include "i915_debugfs_params.h" > +#include "gt/intel_gt.h" > +#include "gt/uc/intel_guc.h" > #include "i915_drv.h" > #include "i915_params.h" > > +#define MATCH_DEBUGFS_NODE_NAME(_file, _name) > (strcmp((_file)->f_path.dentry->d_name.name, (_name)) == 0) > + > +#define GET_I915(i915, name, ptr)\ > + do {\ > + struct i915_params *params; \ > + params = container_of(((void *) (ptr)), typeof(*params), name); > \ > + (i915) = container_of(params, typeof(*(i915)), params); \ > + } while(0) > + > /* int param */ > static int i915_param_int_show(struct seq_file *m, void *data) > { > @@ -24,6 +35,16 @@ static int i915_param_int_open(struct inode *inode, struct > file *file) > return single_open(file, i915_param_int_show, inode->i_private); > } > > +static int notify_guc(struct drm_i915_private *i915) > +{ > + int ret = 0; > + > + if (intel_uc_uses_guc_submission(&i915->gt.uc)) > + ret = intel_guc_global_policies_update(&i915->gt.uc.guc); > + > + return ret; > +} > + > static ssize_t i915_param_int_write(struct file *file, > const char __user *ubuf, size_t len, > loff_t *offp) > @@ -81,8 +102,10 @@ static ssize_t i915_param_uint_write(struct file *file, >const char __user *ubuf, size_t len, >loff_t *offp) > { > + struct drm_i915_private *i915; > struct seq_file *m = file->private_data; > unsigned int *value = m->private; > + unsigned int old = *value; > int ret; > > ret = kstrtouint_from_user(ubuf, len, 0, value); > @@ -95,6 +118,14 @@ static ssize_t i915_param_uint_write(struct file *file, > *value = b; > } > > + if (!ret && MATCH_DEBUGFS_NODE_NAME(file, "reset")) { > + GET_I915(i915, reset, value); We might want to make this into a macro in case we need to update more than just "reset" with the GuC going forward but that is not a blocker. With that: Reviewed-by: Matthew Brost > + > + ret = notify_guc(i915); > + if (ret) > + *value = old; > + } > + > return ret ?: len; > } > > -- > 2.28.0 > > ___ > Intel-gfx mailing list > intel-...@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 43/47] drm/i915/guc: Hook GuC scheduling policies up
On Thu, Jun 24, 2021 at 12:05:12AM -0700, Matthew Brost wrote: > From: John Harrison > > Use the official driver default scheduling policies for configuring > the GuC scheduler rather than a bunch of hardcoded values. > > Signed-off-by: John Harrison > Signed-off-by: Matthew Brost > Cc: Jose Souza > --- > drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 + > drivers/gpu/drm/i915/gt/uc/intel_guc.h| 2 + > drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c| 44 ++- > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 11 +++-- > 4 files changed, 53 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h > b/drivers/gpu/drm/i915/gt/intel_engine_types.h > index 0ceffa2be7a7..37db857bb56c 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h > @@ -455,6 +455,7 @@ struct intel_engine_cs { > #define I915_ENGINE_IS_VIRTUAL BIT(5) > #define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6) > #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) > +#define I915_ENGINE_WANT_FORCED_PREEMPTION BIT(8) > unsigned int flags; > > /* > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h > b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > index c38365cd5fab..905ecbc7dbe3 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > @@ -270,6 +270,8 @@ int intel_guc_engine_failure_process_msg(struct intel_guc > *guc, > > void intel_guc_find_hung_context(struct intel_engine_cs *engine); > > +int intel_guc_global_policies_update(struct intel_guc *guc); > + > void intel_guc_submission_reset_prepare(struct intel_guc *guc); > void intel_guc_submission_reset(struct intel_guc *guc, bool stalled); > void intel_guc_submission_reset_finish(struct intel_guc *guc); > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c > b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c > index d3e86ab7508f..2ad5fcd4e1b7 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c > @@ -77,14 +77,54 @@ static u32 guc_ads_blob_size(struct intel_guc *guc) > guc_ads_private_data_size(guc); > } > > -static void guc_policies_init(struct guc_policies *policies) > +static void guc_policies_init(struct intel_guc *guc, struct guc_policies > *policies) > { > + struct intel_gt *gt = guc_to_gt(guc); > + struct drm_i915_private *i915 = gt->i915; > + > policies->dpc_promote_time = GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US; > policies->max_num_work_items = GLOBAL_POLICY_MAX_NUM_WI; > + > policies->global_flags = 0; > + if (i915->params.reset < 2) > + policies->global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET; > + > policies->is_valid = 1; > } > > +static int guc_action_policies_update(struct intel_guc *guc, u32 > policy_offset) > +{ > + u32 action[] = { > + INTEL_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE, > + policy_offset > + }; > + > + return intel_guc_send(guc, action, ARRAY_SIZE(action)); > +} > + > +int intel_guc_global_policies_update(struct intel_guc *guc) > +{ > + struct __guc_ads_blob *blob = guc->ads_blob; > + struct intel_gt *gt = guc_to_gt(guc); > + intel_wakeref_t wakeref; > + int ret; > + > + if (!blob) > + return -ENOTSUPP; > + > + GEM_BUG_ON(!blob->ads.scheduler_policies); > + > + guc_policies_init(guc, &blob->policies); > + > + if (!intel_guc_is_ready(guc)) > + return 0; > + > + with_intel_runtime_pm(>->i915->runtime_pm, wakeref) > + ret = guc_action_policies_update(guc, > blob->ads.scheduler_policies); > + > + return ret; > +} > + > static void guc_mapping_table_init(struct intel_gt *gt, > struct guc_gt_system_info *system_info) > { > @@ -281,7 +321,7 @@ static void __guc_ads_init(struct intel_guc *guc) > u8 engine_class, guc_class; > > /* GuC scheduling policies */ > - guc_policies_init(&blob->policies); > + guc_policies_init(guc, &blob->policies); > > /* >* GuC expects a per-engine-class context image and size > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > index 6188189314d5..a427336ce916 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > @@ -873,6 +873,7 @@ void intel_guc_submission_reset_finish(struct intel_guc > *guc) > GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); > atomic_set(&guc->outstanding_submission_g2h, 0); > > + intel_guc_global_policies_update(guc); > enable_submission(guc); > intel_gt_unpark_heartbeats(guc_to_gt(guc)); > } > @@ -1161,8 +1162,12 @@ static void guc_context_policy_init(struct > intel_engine_cs *engine, > { > desc->policy_flags = 0; > > - desc->execution_
Re: [PATCH v15 00/12] Restricted DMA
On Fri, Jun 25, 2021 at 3:20 AM Konrad Rzeszutek Wilk wrote: > > On Thu, Jun 24, 2021 at 11:55:14PM +0800, Claire Chang wrote: > > This series implements mitigations for lack of DMA access control on > > systems without an IOMMU, which could result in the DMA accessing the > > system memory at unexpected times and/or unexpected addresses, possibly > > leading to data leakage or corruption. > > > > For example, we plan to use the PCI-e bus for Wi-Fi and that PCI-e bus is > > not behind an IOMMU. As PCI-e, by design, gives the device full access to > > system memory, a vulnerability in the Wi-Fi firmware could easily escalate > > to a full system exploit (remote wifi exploits: [1a], [1b] that shows a > > full chain of exploits; [2], [3]). > > > > To mitigate the security concerns, we introduce restricted DMA. Restricted > > DMA utilizes the existing swiotlb to bounce streaming DMA in and out of a > > specially allocated region and does memory allocation from the same region. > > The feature on its own provides a basic level of protection against the DMA > > overwriting buffer contents at unexpected times. However, to protect > > against general data leakage and system memory corruption, the system needs > > to provide a way to restrict the DMA to a predefined memory region (this is > > usually done at firmware level, e.g. MPU in ATF on some ARM platforms [4]). > > > > [1a] > > https://googleprojectzero.blogspot.com/2017/04/over-air-exploiting-broadcoms-wi-fi_4.html > > [1b] > > https://googleprojectzero.blogspot.com/2017/04/over-air-exploiting-broadcoms-wi-fi_11.html > > [2] https://blade.tencent.com/en/advisories/qualpwn/ > > [3] > > https://www.bleepingcomputer.com/news/security/vulnerabilities-found-in-highly-popular-firmware-for-wifi-chips/ > > [4] > > https://github.com/ARM-software/arm-trusted-firmware/blob/master/plat/mediatek/mt8183/drivers/emi_mpu/emi_mpu.c#L132 > > > > v15: > > - Apply Will's diff > > (https://lore.kernel.org/patchwork/patch/1448957/#1647521) > > to fix the crash reported by Qian. > > - Add Stefano's Acked-by tag for patch 01/12 from v14 > > That all should be now be on > > https://git.kernel.org/pub/scm/linux/kernel/git/konrad/swiotlb.git/ > devel/for-linus-5.14 (and linux-next) > devel/for-linus-5.14 looks good. Thanks!
Re: [PATCH 0/6] KVM: Remove uses of struct page from x86 and arm64 MMU
Excerpts from Paolo Bonzini's message of June 25, 2021 1:35 am: > On 24/06/21 14:57, Nicholas Piggin wrote: >> KVM: Fix page ref underflow for regions with valid but non-refcounted pages > > It doesn't really fix the underflow, it disallows mapping them in the > first place. Since in principle things can break, I'd rather be > explicit, so let's go with "KVM: do not allow mapping valid but > non-reference-counted pages". > >> It's possible to create a region which maps valid but non-refcounted >> pages (e.g., tail pages of non-compound higher order allocations). These >> host pages can then be returned by gfn_to_page, gfn_to_pfn, etc., family >> of APIs, which take a reference to the page, which takes it from 0 to 1. >> When the reference is dropped, this will free the page incorrectly. >> >> Fix this by only taking a reference on the page if it was non-zero, > > s/on the page/on valid pages/ (makes clear that invalid pages are fine > without refcounting). That seems okay, you can adjust the title or changelog as you like. > Thank you *so* much, I'm awful at Linux mm. Glad to help. Easy to see why you were taking this approach because the API really does need to be improved and even a pretty intwined with mm subsystem like KVM shouldn't _really_ be doing this kind of trick (and it should go away when old API is removed). Thanks, Nick
Re: [Intel-gfx] [PATCH 05/47] drm/i915/guc: Add stall timer to non blocking CTB send function
On Thu, Jun 24, 2021 at 07:37:01PM +0200, Michal Wajdeczko wrote: > > > On 24.06.2021 09:04, Matthew Brost wrote: > > Implement a stall timer which fails H2G CTBs once a period of time > > with no forward progress is reached to prevent deadlock. > > > > Also update to ct_write to return -EIO rather than -EPIPE on a > > corrupted descriptor. > > by doing so you will have the same error code for two different problems: > > a) corrupted CTB descriptor (definitely unrecoverable) > b) long stall in CTB processing (still recoverable) > Already discussed both are treated exactly the same by the rest of the stack so we return a single error code. > while caller is explicitly instructed to retry only on: > > c) temporary stall in CTB processing (likely recoverable) > > so why do we want to limit our diagnostics? > > > > > Signed-off-by: John Harrison > > Signed-off-by: Daniele Ceraolo Spurio > > Signed-off-by: Matthew Brost > > --- > > drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 47 +-- > > drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 4 ++ > > 2 files changed, 48 insertions(+), 3 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > > b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > > index c9a65d05911f..27ec30b5ef47 100644 > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > > @@ -319,6 +319,7 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct) > > goto err_deregister; > > > > ct->enabled = true; > > + ct->stall_time = KTIME_MAX; > > > > return 0; > > > > @@ -392,7 +393,7 @@ static int ct_write(struct intel_guc_ct *ct, > > unsigned int i; > > > > if (unlikely(ctb->broken)) > > - return -EPIPE; > > + return -EIO; > > > > if (unlikely(desc->status)) > > goto corrupted; > > @@ -464,7 +465,7 @@ static int ct_write(struct intel_guc_ct *ct, > > CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n", > > desc->head, desc->tail, desc->status); > > ctb->broken = true; > > - return -EPIPE; > > + return -EIO; > > } > > > > /** > > @@ -507,6 +508,18 @@ static int wait_for_ct_request_update(struct > > ct_request *req, u32 *status) > > return err; > > } > > > > +#define GUC_CTB_TIMEOUT_MS 1500 > > it's 150% of core CTB timeout, maybe we should correlate them ? > Seems overkill. > > +static inline bool ct_deadlocked(struct intel_guc_ct *ct) > > +{ > > + long timeout = GUC_CTB_TIMEOUT_MS; > > + bool ret = ktime_ms_delta(ktime_get(), ct->stall_time) > timeout; > > + > > + if (unlikely(ret)) > > + CT_ERROR(ct, "CT deadlocked\n"); > > nit: in commit message you said all these changes are to "prevent > deadlock" so maybe this message should rather be: > > int delta = ktime_ms_delta(ktime_get(), ct->stall_time); > > CT_ERROR(ct, "Communication stalled for %dms\n", delta); > Sure. > (note that CT_ERROR already adds "CT" prefix) > > > + > > + return ret; > > +} > > + > > static inline bool h2g_has_room(struct intel_guc_ct_buffer *ctb, u32 > > len_dw) > > { > > struct guc_ct_buffer_desc *desc = ctb->desc; > > @@ -518,6 +531,26 @@ static inline bool h2g_has_room(struct > > intel_guc_ct_buffer *ctb, u32 len_dw) > > return space >= len_dw; > > } > > > > +static int has_room_nb(struct intel_guc_ct *ct, u32 len_dw) > > +{ > > + struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; > > + > > + lockdep_assert_held(&ct->ctbs.send.lock); > > + > > + if (unlikely(!h2g_has_room(ctb, len_dw))) { > > + if (ct->stall_time == KTIME_MAX) > > + ct->stall_time = ktime_get(); > > + > > + if (unlikely(ct_deadlocked(ct))) > > and maybe above message should be printed somewhere around here when we > detect "deadlock" for the first time? > Not sure I follow. The error message is in the correct place if ask me. Probably should set the broken flag though when the message is printed though. > > + return -EIO; > > + else > > + return -EBUSY; > > + } > > + > > + ct->stall_time = KTIME_MAX; > > + return 0; > > +} > > + > > static int ct_send_nb(struct intel_guc_ct *ct, > > const u32 *action, > > u32 len, > > @@ -530,7 +563,7 @@ static int ct_send_nb(struct intel_guc_ct *ct, > > > > spin_lock_irqsave(&ctb->lock, spin_flags); > > > > - ret = h2g_has_room(ctb, len + 1); > > + ret = has_room_nb(ct, len + 1); > > if (unlikely(ret)) > > goto out; > > > > @@ -574,11 +607,19 @@ static int ct_send(struct intel_guc_ct *ct, > > retry: > > spin_lock_irqsave(&ct->ctbs.send.lock, flags); > > if (unlikely(!h2g_has_room(ctb, len + 1))) { > > + if (ct->stall_time == KTIME_MAX) > > + ct->stall_time = ktime_get(); > > as this is a repeated pattern, maybe it should be moved to h2g_has_room > or other
Re: [Intel-gfx] [PATCH 04/47] drm/i915/guc: Add non blocking CTB send function
On Thu, Jun 24, 2021 at 07:02:18PM +0200, Michal Wajdeczko wrote: > > > On 24.06.2021 17:49, Matthew Brost wrote: > > On Thu, Jun 24, 2021 at 04:48:32PM +0200, Michal Wajdeczko wrote: > >> > >> > >> On 24.06.2021 09:04, Matthew Brost wrote: > >>> Add non blocking CTB send function, intel_guc_send_nb. GuC submission > >>> will send CTBs in the critical path and does not need to wait for these > >>> CTBs to complete before moving on, hence the need for this new function. > >>> > >>> The non-blocking CTB now must have a flow control mechanism to ensure > >>> the buffer isn't overrun. A lazy spin wait is used as we believe the > >>> flow control condition should be rare with a properly sized buffer. > >>> > >>> The function, intel_guc_send_nb, is exported in this patch but unused. > >>> Several patches later in the series make use of this function. > >>> > >>> Signed-off-by: John Harrison > >>> Signed-off-by: Matthew Brost > >>> --- > >>> drivers/gpu/drm/i915/gt/uc/intel_guc.h| 12 +++- > >>> drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 77 +-- > >>> drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 3 +- > >>> 3 files changed, 82 insertions(+), 10 deletions(-) > >>> > >>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h > >>> b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > >>> index 4abc59f6f3cd..24b1df6ad4ae 100644 > >>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h > >>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > >>> @@ -74,7 +74,15 @@ static inline struct intel_guc *log_to_guc(struct > >>> intel_guc_log *log) > >>> static > >>> inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 > >>> len) > >>> { > >>> - return intel_guc_ct_send(&guc->ct, action, len, NULL, 0); > >>> + return intel_guc_ct_send(&guc->ct, action, len, NULL, 0, 0); > >>> +} > >>> + > >>> +#define INTEL_GUC_SEND_NBBIT(31) > >> > >> hmm, this flag really belongs to intel_guc_ct_send() so it should be > >> defined as CTB flag near that function declaration > >> > > > > I can move this up a few lines. > > > >>> +static > >>> +inline int intel_guc_send_nb(struct intel_guc *guc, const u32 *action, > >>> u32 len) > >>> +{ > >>> + return intel_guc_ct_send(&guc->ct, action, len, NULL, 0, > >>> + INTEL_GUC_SEND_NB); > >>> } > >>> > >>> static inline int > >>> @@ -82,7 +90,7 @@ intel_guc_send_and_receive(struct intel_guc *guc, const > >>> u32 *action, u32 len, > >>> u32 *response_buf, u32 response_buf_size) > >>> { > >>> return intel_guc_ct_send(&guc->ct, action, len, > >>> - response_buf, response_buf_size); > >>> + response_buf, response_buf_size, 0); > >>> } > >>> > >>> static inline void intel_guc_to_host_event_handler(struct intel_guc *guc) > >>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > >>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > >>> index a17215920e58..c9a65d05911f 100644 > >>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > >>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > >>> @@ -3,6 +3,11 @@ > >>> * Copyright © 2016-2019 Intel Corporation > >>> */ > >>> > >>> +#include > >>> +#include > >>> +#include > >>> +#include > >>> + > >>> #include "i915_drv.h" > >>> #include "intel_guc_ct.h" > >>> #include "gt/intel_gt.h" > >>> @@ -373,7 +378,7 @@ static void write_barrier(struct intel_guc_ct *ct) > >>> static int ct_write(struct intel_guc_ct *ct, > >>> const u32 *action, > >>> u32 len /* in dwords */, > >>> - u32 fence) > >>> + u32 fence, u32 flags) > >>> { > >>> struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; > >>> struct guc_ct_buffer_desc *desc = ctb->desc; > >>> @@ -421,9 +426,13 @@ static int ct_write(struct intel_guc_ct *ct, > >>>FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) | > >>>FIELD_PREP(GUC_CTB_MSG_0_FENCE, fence); > >>> > >>> - hxg = FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | > >>> - FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION | > >>> - GUC_HXG_REQUEST_MSG_0_DATA0, action[0]); > >>> + hxg = (flags & INTEL_GUC_SEND_NB) ? > >>> + (FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) | > >>> + FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | > >>> + GUC_HXG_EVENT_MSG_0_DATA0, action[0])) : > >>> + (FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | > >>> + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION | > >>> + GUC_HXG_REQUEST_MSG_0_DATA0, action[0])); > >> > >> or as we already switched to accept and return whole HXG messages in > >> guc_send_mmio() maybe we should do the same for CTB variant too and > >> instead of using extra flag just let caller to prepare proper HXG header > >> with HXG_EVENT type and then in CTB code just look at this type to make > >> decision which code path to use > >> > > > > Not sure I follow. Anyways could this be done in a fo
[Bug 213561] [bisected][regression] GFX10 AMDGPUs can no longer enter idle state after commit. Commit has been pushed to stable branches too.
https://bugzilla.kernel.org/show_bug.cgi?id=213561 Linux_Chemist (untaintablean...@hotmail.co.uk) changed: What|Removed |Added Status|NEW |RESOLVED Resolution|--- |CODE_FIX --- Comment #10 from Linux_Chemist (untaintablean...@hotmail.co.uk) --- Thank you :) I'll mark this as resolved since the problem is known and code has been reverted ready for the next kernels. -- You may reply to this email to add a comment. You are receiving this mail because: You are watching the assignee of the bug.
Re: [Intel-gfx] [PATCH 04/47] drm/i915/guc: Add non blocking CTB send function
On Thu, Jun 24, 2021 at 07:02:18PM +0200, Michal Wajdeczko wrote: > > > On 24.06.2021 17:49, Matthew Brost wrote: > > On Thu, Jun 24, 2021 at 04:48:32PM +0200, Michal Wajdeczko wrote: > >> > >> > >> On 24.06.2021 09:04, Matthew Brost wrote: > >>> Add non blocking CTB send function, intel_guc_send_nb. GuC submission > >>> will send CTBs in the critical path and does not need to wait for these > >>> CTBs to complete before moving on, hence the need for this new function. > >>> > >>> The non-blocking CTB now must have a flow control mechanism to ensure > >>> the buffer isn't overrun. A lazy spin wait is used as we believe the > >>> flow control condition should be rare with a properly sized buffer. > >>> > >>> The function, intel_guc_send_nb, is exported in this patch but unused. > >>> Several patches later in the series make use of this function. > >>> > >>> Signed-off-by: John Harrison > >>> Signed-off-by: Matthew Brost > >>> --- > >>> drivers/gpu/drm/i915/gt/uc/intel_guc.h| 12 +++- > >>> drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 77 +-- > >>> drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 3 +- > >>> 3 files changed, 82 insertions(+), 10 deletions(-) > >>> > >>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h > >>> b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > >>> index 4abc59f6f3cd..24b1df6ad4ae 100644 > >>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h > >>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > >>> @@ -74,7 +74,15 @@ static inline struct intel_guc *log_to_guc(struct > >>> intel_guc_log *log) > >>> static > >>> inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 > >>> len) > >>> { > >>> - return intel_guc_ct_send(&guc->ct, action, len, NULL, 0); > >>> + return intel_guc_ct_send(&guc->ct, action, len, NULL, 0, 0); > >>> +} > >>> + > >>> +#define INTEL_GUC_SEND_NBBIT(31) > >> > >> hmm, this flag really belongs to intel_guc_ct_send() so it should be > >> defined as CTB flag near that function declaration > >> > > > > I can move this up a few lines. > > > >>> +static > >>> +inline int intel_guc_send_nb(struct intel_guc *guc, const u32 *action, > >>> u32 len) > >>> +{ > >>> + return intel_guc_ct_send(&guc->ct, action, len, NULL, 0, > >>> + INTEL_GUC_SEND_NB); > >>> } > >>> > >>> static inline int > >>> @@ -82,7 +90,7 @@ intel_guc_send_and_receive(struct intel_guc *guc, const > >>> u32 *action, u32 len, > >>> u32 *response_buf, u32 response_buf_size) > >>> { > >>> return intel_guc_ct_send(&guc->ct, action, len, > >>> - response_buf, response_buf_size); > >>> + response_buf, response_buf_size, 0); > >>> } > >>> > >>> static inline void intel_guc_to_host_event_handler(struct intel_guc *guc) > >>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > >>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > >>> index a17215920e58..c9a65d05911f 100644 > >>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > >>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > >>> @@ -3,6 +3,11 @@ > >>> * Copyright © 2016-2019 Intel Corporation > >>> */ > >>> > >>> +#include > >>> +#include > >>> +#include > >>> +#include > >>> + > >>> #include "i915_drv.h" > >>> #include "intel_guc_ct.h" > >>> #include "gt/intel_gt.h" > >>> @@ -373,7 +378,7 @@ static void write_barrier(struct intel_guc_ct *ct) > >>> static int ct_write(struct intel_guc_ct *ct, > >>> const u32 *action, > >>> u32 len /* in dwords */, > >>> - u32 fence) > >>> + u32 fence, u32 flags) > >>> { > >>> struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; > >>> struct guc_ct_buffer_desc *desc = ctb->desc; > >>> @@ -421,9 +426,13 @@ static int ct_write(struct intel_guc_ct *ct, > >>>FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) | > >>>FIELD_PREP(GUC_CTB_MSG_0_FENCE, fence); > >>> > >>> - hxg = FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | > >>> - FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION | > >>> - GUC_HXG_REQUEST_MSG_0_DATA0, action[0]); > >>> + hxg = (flags & INTEL_GUC_SEND_NB) ? > >>> + (FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) | > >>> + FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | > >>> + GUC_HXG_EVENT_MSG_0_DATA0, action[0])) : > >>> + (FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | > >>> + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION | > >>> + GUC_HXG_REQUEST_MSG_0_DATA0, action[0])); > >> > >> or as we already switched to accept and return whole HXG messages in > >> guc_send_mmio() maybe we should do the same for CTB variant too and > >> instead of using extra flag just let caller to prepare proper HXG header > >> with HXG_EVENT type and then in CTB code just look at this type to make > >> decision which code path to use > >> > > > > Not sure I follow. Anyways could this be done in a fo
[PATCH] drm/panel: ws2401: Add driver for WideChips WS2401
This adds a driver for panels based on the WideChips WS2401 display controller. This display controller is used in the Samsung LMS380KF01 display found in the Samsung GT-I8160 (Codina) mobile phone and possibly others. As is common with Samsung displays manufacturer commands are necessary to configure the display to a working state. The display optionally supports internal backlight control, but can also use an external backlight. This driver re-uses the DBI infrastructure to communicate with the display. Cc: phone-de...@vger.kernel.org Cc: Douglas Anderson Cc: Noralf Trønnes Signed-off-by: Linus Walleij --- MAINTAINERS | 7 + drivers/gpu/drm/panel/Kconfig | 9 + drivers/gpu/drm/panel/Makefile| 1 + .../gpu/drm/panel/panel-widechips-ws2401.c| 404 ++ 4 files changed, 421 insertions(+) create mode 100644 drivers/gpu/drm/panel/panel-widechips-ws2401.c diff --git a/MAINTAINERS b/MAINTAINERS index bd7aff0c120f..8bfa89f61220 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5946,6 +5946,13 @@ S: Maintained T: git git://anongit.freedesktop.org/drm/drm-misc F: drivers/gpu/drm/vboxvideo/ +DRM DRIVER FOR WIDECHIPS WS2401 PANELS +M: Linus Walleij +S: Maintained +T: git git://anongit.freedesktop.org/drm/drm-misc +F: Documentation/devicetree/bindings/display/panel/samsung,lms380kf01.yaml +F: drivers/gpu/drm/panel/panel-widechips-ws2401.c + DRM DRIVER FOR VMWARE VIRTUAL GPU M: "VMware Graphics" M: Roland Scheidegger diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index 4894913936e9..f4fe1dba9912 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@ -552,6 +552,15 @@ config DRM_PANEL_VISIONOX_RM69299 Say Y here if you want to enable support for Visionox RM69299 DSI Video Mode panel. +config DRM_PANEL_WIDECHIPS_WS2401 + tristate "Widechips WS2401 DPI panel driver" + depends on OF && SPI && GPIOLIB + depends on BACKLIGHT_CLASS_DEVICE + select DRM_MIPI_DBI + help + Say Y here if you want to enable support for the Widechips + WS2401 DPI 480x800 display controller. + config DRM_PANEL_XINPENG_XPP055C272 tristate "Xinpeng XPP055C272 panel driver" depends on OF diff --git a/drivers/gpu/drm/panel/Makefile b/drivers/gpu/drm/panel/Makefile index cae4d976c069..d94c27df17aa 100644 --- a/drivers/gpu/drm/panel/Makefile +++ b/drivers/gpu/drm/panel/Makefile @@ -58,4 +58,5 @@ obj-$(CONFIG_DRM_PANEL_TPO_TD043MTEA1) += panel-tpo-td043mtea1.o obj-$(CONFIG_DRM_PANEL_TPO_TPG110) += panel-tpo-tpg110.o obj-$(CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA) += panel-truly-nt35597.o obj-$(CONFIG_DRM_PANEL_VISIONOX_RM69299) += panel-visionox-rm69299.o +obj-$(CONFIG_DRM_PANEL_WIDECHIPS_WS2401) += panel-widechips-ws2401.o obj-$(CONFIG_DRM_PANEL_XINPENG_XPP055C272) += panel-xinpeng-xpp055c272.o diff --git a/drivers/gpu/drm/panel/panel-widechips-ws2401.c b/drivers/gpu/drm/panel/panel-widechips-ws2401.c new file mode 100644 index ..d15870301174 --- /dev/null +++ b/drivers/gpu/drm/panel/panel-widechips-ws2401.c @@ -0,0 +1,404 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Panel driver for the WideChips WS2401 480x800 DPI RGB panel, used in + * the Samsung Mobile Display (SMD) LMS380KF01. + * Found in the Samsung Galaxy Ace 2 GT-I8160 mobile phone. + * Linus Walleij + * Inspired by code and know-how in the vendor driver by Gareth Phillips. + */ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define WS2401_RESCTL 0xb8 /* Resolution select control */ +#define WS2401_PSMPS 0xbd /* SMPS positive control */ +#define WS2401_NSMPS 0xbe /* SMPS negative control */ +#define WS2401_SMPS0xbf +#define WS2401_BCMODE 0xc1 /* Backlight control mode */ +#define WS2401_WRBLCTL 0xc3 /* Backlight control */ +#define WS2401_WRDISBV 0xc4 /* Write manual brightness */ +#define WS2401_WRCTRLD 0xc6 /* Write BL control */ +#define WS2401_WRMIE 0xc7 /* Write MIE mode */ +#define WS2401_READ_ID10xda /* Read panel ID 1 */ +#define WS2401_READ_ID20xdb /* Read panel ID 2 */ +#define WS2401_READ_ID30xdc /* Read panel ID 3 */ +#define WS2401_PASSWD1 0xf0 /* Password command for level 2 */ +#define WS2401_DISCTL 0xf2 /* Display control */ +#define WS2401_PWRCTL 0xf3 /* Power control */ +#define WS2401_VCOMCTL 0xf4 /* VCOM control */ +#define WS2401_SRCCTL 0xf5 /* Source control */ +#define WS2401_PANELCTL0xf6 /* Panel control */ + +static const u8 ws2
[PATCH] drm/panel: Add DT bindings for Samsung LMS380KF01
This adds device tree bindings for the Samsung Mobile Displays LMS380KF01 RGB DPI display panel. Cc: devicet...@vger.kernel.org Cc: phone-de...@vger.kernel.org Cc: Douglas Anderson Cc: Noralf Trønnes Signed-off-by: Linus Walleij --- .../display/panel/samsung,lms380kf01.yaml | 96 +++ 1 file changed, 96 insertions(+) create mode 100644 Documentation/devicetree/bindings/display/panel/samsung,lms380kf01.yaml diff --git a/Documentation/devicetree/bindings/display/panel/samsung,lms380kf01.yaml b/Documentation/devicetree/bindings/display/panel/samsung,lms380kf01.yaml new file mode 100644 index ..138be12fc509 --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/samsung,lms380kf01.yaml @@ -0,0 +1,96 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/samsung,lms380kf01.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Samsung LMS380KF01 display panel + +description: The LMS380KF01 is a 480x800 DPI display panel from Samsung Mobile + Displays (SMD) utilizing the WideChips WS2401 display controller. It can be + used with internal or external backlight control. + +maintainers: + - Linus Walleij + +allOf: + - $ref: panel-common.yaml# + +properties: + compatible: +const: samsung,lms380kf01 + + reg: true + + interrupts: +description: provides an optional ESD (electrostatic discharge) + interrupt that signals abnormalities in the display hardware. + This can also be raised for other reasons like erroneous + configuration. +maxItems: 1 + + reset-gpios: true + + vci-supply: +description: regulator that supplies the VCI analog voltage + usually around 3.0 V + + vccio-supply: +description: regulator that supplies the VCCIO voltage usually + around 1.8 V + + backlight: true + + spi-cpha: +$ref: /schemas/types.yaml#/definitions/flag +description: inherited as a SPI client node. Must be set. + + spi-cpol: +$ref: /schemas/types.yaml#/definitions/flag +description: inherited as a SPI client node. Must be set. + + spi-max-frequency: +$ref: /schemas/types.yaml#/definitions/uint32 +description: inherited as a SPI client node. +maximum: 120 + + port: true + +required: + - compatible + - reg + - spi-cpha + - spi-cpol + +additionalProperties: false + +examples: + - | +#include +#include + +spi { + #address-cells = <1>; + #size-cells = <0>; + + panel@0 { +compatible = "samsung,lms380kf01"; +spi-max-frequency = <120>; +spi-cpha; +spi-cpol; +reg = <0>; +vci-supply = <&lcd_3v0_reg>; +vccio-supply = <&lcd_1v8_reg>; +reset-gpios = <&gpio4 11 GPIO_ACTIVE_LOW>; +interrupt-parent = <&gpio2>; +interrupts = <29 IRQ_TYPE_EDGE_RISING>; + +port { + panel_in: endpoint { +remote-endpoint = <&display_out>; + }; +}; + }; +}; + +... -- 2.31.1
[Bug 213561] [bisected][regression] GFX10 AMDGPUs can no longer enter idle state after commit. Commit has been pushed to stable branches too.
https://bugzilla.kernel.org/show_bug.cgi?id=213561 Alan Swanson (rei...@improbability.net) changed: What|Removed |Added CC||rei...@improbability.net --- Comment #9 from Alan Swanson (rei...@improbability.net) --- These patches have just been reverted for 5.13-rc8 and should hopefully be backported to be stable. https://lists.freedesktop.org/archives/amd-gfx/2021-June/065575.html https://lists.freedesktop.org/archives/dri-devel/2021-June/312755.html -- You may reply to this email to add a comment. You are receiving this mail because: You are watching the assignee of the bug.
Re: [PATCH v4 2/3] dt-bindings: msm: dsi: document phy-type property for 7nm dsi phy
On Thu, Jun 17, 2021 at 10:43:34AM -0400, Jonathan Marek wrote: > Document a new phy-type property which will be used to determine whether > the phy should operate in D-PHY or C-PHY mode. > > Signed-off-by: Jonathan Marek > Reviewed-by: Laurent Pinchart > --- > .../devicetree/bindings/display/msm/dsi-phy-7nm.yaml | 5 + > include/dt-bindings/phy/phy.h| 2 ++ > 2 files changed, 7 insertions(+) > > diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml > b/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml > index c0077ca7e9e7..70809d1cac54 100644 > --- a/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml > +++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml > @@ -34,6 +34,11 @@ properties: > description: | >Connected to VDD_A_DSI_PLL_0P9 pin (or VDDA_DSI{0,1}_PLL_0P9 for > sm8150) > > + phy-type: > +description: D-PHY (default) or C-PHY mode > +enum: [ 10, 11 ] > +default: 10 > + > required: >- compatible >- reg > diff --git a/include/dt-bindings/phy/phy.h b/include/dt-bindings/phy/phy.h > index 887a31b250a8..f48c9acf251e 100644 > --- a/include/dt-bindings/phy/phy.h > +++ b/include/dt-bindings/phy/phy.h > @@ -20,5 +20,7 @@ > #define PHY_TYPE_XPCS7 > #define PHY_TYPE_SGMII 8 > #define PHY_TYPE_QSGMII 9 > +#define PHY_TYPE_DPHY10 > +#define PHY_TYPE_CPHY11 I thought I recalled a suggestion to add 'MIPI_' in this. Or was there another similar patch? If not, I'm fine either way: Acked-by: Rob Herring
Re: [PATCH] drm/sched: Split drm_sched_job_init
On Thu, Jun 24, 2021 at 11:00 PM Emma Anholt wrote: > > On Thu, Jun 24, 2021 at 1:45 PM Daniel Vetter wrote: > > > > This is a very confusingly named function, because not just does it > > init an object, it arms it and provides a point of no return for > > pushing a job into the scheduler. It would be nice if that's a bit > > clearer in the interface. > > > > But the real reason is that I want to push the dependency tracking > > helpers into the scheduler code, and that means drm_sched_job_init > > must be called a lot earlier, without arming the job. > > > > v2: > > - don't change .gitignore (Steven) > > - don't forget v3d (Emma) > > > > Acked-by: Steven Price > > Signed-off-by: Daniel Vetter > > Cc: Lucas Stach > > Cc: Russell King > > Cc: Christian Gmeiner > > Cc: Qiang Yu > > Cc: Rob Herring > > Cc: Tomeu Vizoso > > Cc: Steven Price > > Cc: Alyssa Rosenzweig > > Cc: David Airlie > > Cc: Daniel Vetter > > Cc: Sumit Semwal > > Cc: "Christian König" > > Cc: Masahiro Yamada > > Cc: Kees Cook > > Cc: Adam Borowski > > Cc: Nick Terrell > > Cc: Mauro Carvalho Chehab > > Cc: Paul Menzel > > Cc: Sami Tolvanen > > Cc: Viresh Kumar > > Cc: Alex Deucher > > Cc: Dave Airlie > > Cc: Nirmoy Das > > Cc: Deepak R Varma > > Cc: Lee Jones > > Cc: Kevin Wang > > Cc: Chen Li > > Cc: Luben Tuikov > > Cc: "Marek Olšák" > > Cc: Dennis Li > > Cc: Maarten Lankhorst > > Cc: Andrey Grodzovsky > > Cc: Sonny Jiang > > Cc: Boris Brezillon > > Cc: Tian Tao > > Cc: Jack Zhang > > Cc: etna...@lists.freedesktop.org > > Cc: l...@lists.freedesktop.org > > Cc: linux-me...@vger.kernel.org > > Cc: linaro-mm-...@lists.linaro.org > > Cc: Emma Anholt > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++ > > drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 ++ > > drivers/gpu/drm/etnaviv/etnaviv_sched.c | 2 ++ > > drivers/gpu/drm/lima/lima_sched.c| 2 ++ > > drivers/gpu/drm/panfrost/panfrost_job.c | 2 ++ > > drivers/gpu/drm/scheduler/sched_entity.c | 6 +++--- > > drivers/gpu/drm/scheduler/sched_fence.c | 15 ++- > > drivers/gpu/drm/scheduler/sched_main.c | 23 ++- > > drivers/gpu/drm/v3d/v3d_gem.c| 2 ++ > > include/drm/gpu_scheduler.h | 6 +- > > 10 files changed, 52 insertions(+), 10 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > > index c5386d13eb4a..a4ec092af9a7 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > > @@ -1226,6 +1226,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser > > *p, > > if (r) > > goto error_unlock; > > > > + drm_sched_job_arm(&job->base); > > + > > /* No memory allocation is allowed while holding the notifier lock. > > * The lock is held until amdgpu_cs_submit is finished and fence is > > * added to BOs. > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > > index d33e6d97cc89..5ddb955d2315 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > > @@ -170,6 +170,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct > > drm_sched_entity *entity, > > if (r) > > return r; > > > > + drm_sched_job_arm(&job->base); > > + > > *f = dma_fence_get(&job->base.s_fence->finished); > > amdgpu_job_free_resources(job); > > drm_sched_entity_push_job(&job->base, entity); > > diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c > > b/drivers/gpu/drm/etnaviv/etnaviv_sched.c > > index 19826e504efc..af1671f01c7f 100644 > > --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c > > +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c > > @@ -163,6 +163,8 @@ int etnaviv_sched_push_job(struct drm_sched_entity > > *sched_entity, > > if (ret) > > goto out_unlock; > > > > + drm_sched_job_arm(&submit->sched_job); > > + > > submit->out_fence = > > dma_fence_get(&submit->sched_job.s_fence->finished); > > submit->out_fence_id = idr_alloc_cyclic(&submit->gpu->fence_idr, > > submit->out_fence, 0, > > diff --git a/drivers/gpu/drm/lima/lima_sched.c > > b/drivers/gpu/drm/lima/lima_sched.c > > index ecf3267334ff..bd1af1fd8c0f 100644 > > --- a/drivers/gpu/drm/lima/lima_sched.c > > +++ b/drivers/gpu/drm/lima/lima_sched.c > > @@ -129,6 +129,8 @@ int lima_sched_task_init(struct lima_sched_task *task, > > return err; > > } > > > > + drm_sched_job_arm(&task->base); > > + > > task->num_bos = num_bos; > > task->vm = lima_vm_get(vm); > > > > diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c > > b/drivers/gpu/drm/panfrost/panfrost_job.c > > index beb62c8fc851..1e950534b9b0 100644 > > --- a/drivers/gpu/drm/panfrost/panfrost_job.c > > +++ b/drivers/gpu/drm/p
[Bug 212107] Temperature increase by 15°C on radeon gpu
https://bugzilla.kernel.org/show_bug.cgi?id=212107 miloog (mileikas...@mailbox.org) changed: What|Removed |Added CC||mileikas...@mailbox.org --- Comment #11 from miloog (mileikas...@mailbox.org) --- I can confirm. But in a different scenario. I'm using debian bullseye with lts kernel and latest amdgpu firmware. I don't change any fan control mechanism. 5.10.44 and 5.10.45 works fine but 5.10.46 if i'm only start sway (wayland window manager) my gpu usage is at 100% without doing anything. It's a vega 56. -- You may reply to this email to add a comment. You are receiving this mail because: You are watching the assignee of the bug.
Re: [PATCH] drm/sched: Split drm_sched_job_init
On Thu, Jun 24, 2021 at 1:45 PM Daniel Vetter wrote: > > This is a very confusingly named function, because not just does it > init an object, it arms it and provides a point of no return for > pushing a job into the scheduler. It would be nice if that's a bit > clearer in the interface. > > But the real reason is that I want to push the dependency tracking > helpers into the scheduler code, and that means drm_sched_job_init > must be called a lot earlier, without arming the job. > > v2: > - don't change .gitignore (Steven) > - don't forget v3d (Emma) > > Acked-by: Steven Price > Signed-off-by: Daniel Vetter > Cc: Lucas Stach > Cc: Russell King > Cc: Christian Gmeiner > Cc: Qiang Yu > Cc: Rob Herring > Cc: Tomeu Vizoso > Cc: Steven Price > Cc: Alyssa Rosenzweig > Cc: David Airlie > Cc: Daniel Vetter > Cc: Sumit Semwal > Cc: "Christian König" > Cc: Masahiro Yamada > Cc: Kees Cook > Cc: Adam Borowski > Cc: Nick Terrell > Cc: Mauro Carvalho Chehab > Cc: Paul Menzel > Cc: Sami Tolvanen > Cc: Viresh Kumar > Cc: Alex Deucher > Cc: Dave Airlie > Cc: Nirmoy Das > Cc: Deepak R Varma > Cc: Lee Jones > Cc: Kevin Wang > Cc: Chen Li > Cc: Luben Tuikov > Cc: "Marek Olšák" > Cc: Dennis Li > Cc: Maarten Lankhorst > Cc: Andrey Grodzovsky > Cc: Sonny Jiang > Cc: Boris Brezillon > Cc: Tian Tao > Cc: Jack Zhang > Cc: etna...@lists.freedesktop.org > Cc: l...@lists.freedesktop.org > Cc: linux-me...@vger.kernel.org > Cc: linaro-mm-...@lists.linaro.org > Cc: Emma Anholt > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 ++ > drivers/gpu/drm/etnaviv/etnaviv_sched.c | 2 ++ > drivers/gpu/drm/lima/lima_sched.c| 2 ++ > drivers/gpu/drm/panfrost/panfrost_job.c | 2 ++ > drivers/gpu/drm/scheduler/sched_entity.c | 6 +++--- > drivers/gpu/drm/scheduler/sched_fence.c | 15 ++- > drivers/gpu/drm/scheduler/sched_main.c | 23 ++- > drivers/gpu/drm/v3d/v3d_gem.c| 2 ++ > include/drm/gpu_scheduler.h | 6 +- > 10 files changed, 52 insertions(+), 10 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index c5386d13eb4a..a4ec092af9a7 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -1226,6 +1226,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > if (r) > goto error_unlock; > > + drm_sched_job_arm(&job->base); > + > /* No memory allocation is allowed while holding the notifier lock. > * The lock is held until amdgpu_cs_submit is finished and fence is > * added to BOs. > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > index d33e6d97cc89..5ddb955d2315 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > @@ -170,6 +170,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct > drm_sched_entity *entity, > if (r) > return r; > > + drm_sched_job_arm(&job->base); > + > *f = dma_fence_get(&job->base.s_fence->finished); > amdgpu_job_free_resources(job); > drm_sched_entity_push_job(&job->base, entity); > diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c > b/drivers/gpu/drm/etnaviv/etnaviv_sched.c > index 19826e504efc..af1671f01c7f 100644 > --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c > +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c > @@ -163,6 +163,8 @@ int etnaviv_sched_push_job(struct drm_sched_entity > *sched_entity, > if (ret) > goto out_unlock; > > + drm_sched_job_arm(&submit->sched_job); > + > submit->out_fence = > dma_fence_get(&submit->sched_job.s_fence->finished); > submit->out_fence_id = idr_alloc_cyclic(&submit->gpu->fence_idr, > submit->out_fence, 0, > diff --git a/drivers/gpu/drm/lima/lima_sched.c > b/drivers/gpu/drm/lima/lima_sched.c > index ecf3267334ff..bd1af1fd8c0f 100644 > --- a/drivers/gpu/drm/lima/lima_sched.c > +++ b/drivers/gpu/drm/lima/lima_sched.c > @@ -129,6 +129,8 @@ int lima_sched_task_init(struct lima_sched_task *task, > return err; > } > > + drm_sched_job_arm(&task->base); > + > task->num_bos = num_bos; > task->vm = lima_vm_get(vm); > > diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c > b/drivers/gpu/drm/panfrost/panfrost_job.c > index beb62c8fc851..1e950534b9b0 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_job.c > +++ b/drivers/gpu/drm/panfrost/panfrost_job.c > @@ -244,6 +244,8 @@ int panfrost_job_push(struct panfrost_job *job) > goto unlock; > } > > + drm_sched_job_arm(&job->base); > + > job->render_done_fence = dma_fence_get(&job->base.s_fence->finished); > > ret = panfrost_acquire_object_fences(job->bos, job->bo_
[Bug 213569] Amdgpu temperature reaching dangerous levels
https://bugzilla.kernel.org/show_bug.cgi?id=213569 miloog (mileikas...@mailbox.org) changed: What|Removed |Added CC||mileikas...@mailbox.org --- Comment #1 from miloog (mileikas...@mailbox.org) --- I can confirm. But in a different scenario. I'm using debian bullseye with lts kernel and latest amdgpu firmware. I don't change any fan control mechanism. 5.10.44 and 5.10.45 works fine but 5.10.46 if i'm only start sway (wayland window manager) my gpu usage is at 100% without doing anything. It's a vega 56. -- You may reply to this email to add a comment. You are receiving this mail because: You are watching the assignee of the bug.
Re: [PATCH v2 2/2] drm/panel: Add support for E Ink VB3300-KCA
On Tue, Jun 15, 2021 at 08:33:12PM +1000, Alistair Francis wrote: > Add support for the 10.3" E Ink panel described at: > https://www.eink.com/product.html?type=productdetail&id=7 > > Signed-off-by: Alistair Francis > --- > v2: > - Fix build warning > - Document new string > > .../bindings/display/panel/panel-simple.yaml | 2 ++ > drivers/gpu/drm/panel/panel-simple.c | 29 +++ > 2 files changed, 31 insertions(+) > > diff --git > a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml > b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml > index b3797ba2698b..799e20222551 100644 > --- a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml > +++ b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml > @@ -128,6 +128,8 @@ properties: > # Emerging Display Technology Corp. WVGA TFT Display with capacitive > touch >- edt,etm0700g0dh6 >- edt,etm0700g0edh6 > +# E Ink VB3300-KCA > + - eink,vb3300-kca Combining this with patch 1 would be preferrable. Either way, Acked-by: Rob Herring
Re: [PATCH] drm/v3d: Move drm_sched_job_init to v3d_job_init
On Thu, Jun 24, 2021 at 1:45 PM Daniel Vetter wrote: > > Prep work for using the scheduler dependency handling. We need to call > drm_sched_job_init earlier so we can use the new drm_sched_job_await* > functions for dependency handling here. > > v2: Slightly better commit message and rebase to include the > drm_sched_job_arm() call (Emma). > > Signed-off-by: Daniel Vetter > Cc: Emma Anholt OK, makes sense now. r-b.
[PATCH] drm/sched: Split drm_sched_job_init
This is a very confusingly named function, because not just does it init an object, it arms it and provides a point of no return for pushing a job into the scheduler. It would be nice if that's a bit clearer in the interface. But the real reason is that I want to push the dependency tracking helpers into the scheduler code, and that means drm_sched_job_init must be called a lot earlier, without arming the job. v2: - don't change .gitignore (Steven) - don't forget v3d (Emma) Acked-by: Steven Price Signed-off-by: Daniel Vetter Cc: Lucas Stach Cc: Russell King Cc: Christian Gmeiner Cc: Qiang Yu Cc: Rob Herring Cc: Tomeu Vizoso Cc: Steven Price Cc: Alyssa Rosenzweig Cc: David Airlie Cc: Daniel Vetter Cc: Sumit Semwal Cc: "Christian König" Cc: Masahiro Yamada Cc: Kees Cook Cc: Adam Borowski Cc: Nick Terrell Cc: Mauro Carvalho Chehab Cc: Paul Menzel Cc: Sami Tolvanen Cc: Viresh Kumar Cc: Alex Deucher Cc: Dave Airlie Cc: Nirmoy Das Cc: Deepak R Varma Cc: Lee Jones Cc: Kevin Wang Cc: Chen Li Cc: Luben Tuikov Cc: "Marek Olšák" Cc: Dennis Li Cc: Maarten Lankhorst Cc: Andrey Grodzovsky Cc: Sonny Jiang Cc: Boris Brezillon Cc: Tian Tao Cc: Jack Zhang Cc: etna...@lists.freedesktop.org Cc: l...@lists.freedesktop.org Cc: linux-me...@vger.kernel.org Cc: linaro-mm-...@lists.linaro.org Cc: Emma Anholt --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 ++ drivers/gpu/drm/etnaviv/etnaviv_sched.c | 2 ++ drivers/gpu/drm/lima/lima_sched.c| 2 ++ drivers/gpu/drm/panfrost/panfrost_job.c | 2 ++ drivers/gpu/drm/scheduler/sched_entity.c | 6 +++--- drivers/gpu/drm/scheduler/sched_fence.c | 15 ++- drivers/gpu/drm/scheduler/sched_main.c | 23 ++- drivers/gpu/drm/v3d/v3d_gem.c| 2 ++ include/drm/gpu_scheduler.h | 6 +- 10 files changed, 52 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index c5386d13eb4a..a4ec092af9a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1226,6 +1226,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, if (r) goto error_unlock; + drm_sched_job_arm(&job->base); + /* No memory allocation is allowed while holding the notifier lock. * The lock is held until amdgpu_cs_submit is finished and fence is * added to BOs. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index d33e6d97cc89..5ddb955d2315 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -170,6 +170,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, if (r) return r; + drm_sched_job_arm(&job->base); + *f = dma_fence_get(&job->base.s_fence->finished); amdgpu_job_free_resources(job); drm_sched_entity_push_job(&job->base, entity); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index 19826e504efc..af1671f01c7f 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -163,6 +163,8 @@ int etnaviv_sched_push_job(struct drm_sched_entity *sched_entity, if (ret) goto out_unlock; + drm_sched_job_arm(&submit->sched_job); + submit->out_fence = dma_fence_get(&submit->sched_job.s_fence->finished); submit->out_fence_id = idr_alloc_cyclic(&submit->gpu->fence_idr, submit->out_fence, 0, diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c index ecf3267334ff..bd1af1fd8c0f 100644 --- a/drivers/gpu/drm/lima/lima_sched.c +++ b/drivers/gpu/drm/lima/lima_sched.c @@ -129,6 +129,8 @@ int lima_sched_task_init(struct lima_sched_task *task, return err; } + drm_sched_job_arm(&task->base); + task->num_bos = num_bos; task->vm = lima_vm_get(vm); diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index beb62c8fc851..1e950534b9b0 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -244,6 +244,8 @@ int panfrost_job_push(struct panfrost_job *job) goto unlock; } + drm_sched_job_arm(&job->base); + job->render_done_fence = dma_fence_get(&job->base.s_fence->finished); ret = panfrost_acquire_object_fences(job->bos, job->bo_count, diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 79554aa4dbb1..f7347c284886 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -485,9 +485,9 @@ void drm_sched_entity_select_rq(struct drm_sched_entity *entity
[PATCH] drm/v3d: Move drm_sched_job_init to v3d_job_init
Prep work for using the scheduler dependency handling. We need to call drm_sched_job_init earlier so we can use the new drm_sched_job_await* functions for dependency handling here. v2: Slightly better commit message and rebase to include the drm_sched_job_arm() call (Emma). Signed-off-by: Daniel Vetter Cc: Emma Anholt --- drivers/gpu/drm/v3d/v3d_gem.c | 64 +++ 1 file changed, 20 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 69ac20e11b09..d1028ccf6dd5 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -433,9 +433,10 @@ v3d_wait_bo_ioctl(struct drm_device *dev, void *data, static int v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, struct v3d_job *job, void (*free)(struct kref *ref), -u32 in_sync) +u32 in_sync, enum v3d_queue queue) { struct dma_fence *in_fence = NULL; + struct v3d_file_priv *v3d_priv = file_priv->driver_priv; int ret; job->v3d = v3d; @@ -446,6 +447,10 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, return ret; xa_init_flags(&job->deps, XA_FLAGS_ALLOC); + ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue], +v3d_priv); + if (ret) + goto fail; ret = drm_syncobj_find_fence(file_priv, in_sync, 0, 0, &in_fence); if (ret == -EINVAL) @@ -464,17 +469,9 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, return ret; } -static int -v3d_push_job(struct v3d_file_priv *v3d_priv, -struct v3d_job *job, enum v3d_queue queue) +static void +v3d_push_job(struct v3d_job *job) { - int ret; - - ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue], -v3d_priv); - if (ret) - return ret; - drm_sched_job_arm(&job->base); job->done_fence = dma_fence_get(&job->base.s_fence->finished); @@ -483,8 +480,6 @@ v3d_push_job(struct v3d_file_priv *v3d_priv, kref_get(&job->refcount); drm_sched_entity_push_job(&job->base); - - return 0; } static void @@ -530,7 +525,6 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct v3d_dev *v3d = to_v3d_dev(dev); - struct v3d_file_priv *v3d_priv = file_priv->driver_priv; struct drm_v3d_submit_cl *args = data; struct v3d_bin_job *bin = NULL; struct v3d_render_job *render; @@ -556,7 +550,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, INIT_LIST_HEAD(&render->unref_list); ret = v3d_job_init(v3d, file_priv, &render->base, - v3d_render_job_free, args->in_sync_rcl); + v3d_render_job_free, args->in_sync_rcl, V3D_RENDER); if (ret) { kfree(render); return ret; @@ -570,7 +564,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, } ret = v3d_job_init(v3d, file_priv, &bin->base, - v3d_job_free, args->in_sync_bcl); + v3d_job_free, args->in_sync_bcl, V3D_BIN); if (ret) { v3d_job_put(&render->base); kfree(bin); @@ -592,7 +586,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, goto fail; } - ret = v3d_job_init(v3d, file_priv, clean_job, v3d_job_free, 0); + ret = v3d_job_init(v3d, file_priv, clean_job, v3d_job_free, 0, V3D_CACHE_CLEAN); if (ret) { kfree(clean_job); clean_job = NULL; @@ -615,9 +609,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, mutex_lock(&v3d->sched_lock); if (bin) { - ret = v3d_push_job(v3d_priv, &bin->base, V3D_BIN); - if (ret) - goto fail_unreserve; + v3d_push_job(&bin->base); ret = drm_gem_fence_array_add(&render->base.deps, dma_fence_get(bin->base.done_fence)); @@ -625,9 +617,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, goto fail_unreserve; } - ret = v3d_push_job(v3d_priv, &render->base, V3D_RENDER); - if (ret) - goto fail_unreserve; + v3d_push_job(&render->base); if (clean_job) { struct dma_fence *render_fence = @@ -635,9 +625,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, ret = drm_gem_fence_array_add(&clean_job->deps, render_fence); if (ret) goto fail_unreserve; - ret = v3d_push_job(v3d_priv, clean_j
Re: [git pull] drm fixes for 5.13-rc8/final
The pull request you sent on Fri, 25 Jun 2021 06:17:22 +1000: > git://anongit.freedesktop.org/drm/drm tags/drm-fixes-2021-06-25 has been merged into torvalds/linux.git: https://git.kernel.org/torvalds/c/44db63d1ad8d71c6932cbe007eb41f31c434d140 Thank you! -- Deet-doot-dot, I am a bot. https://korg.docs.kernel.org/prtracker.html
[git pull] drm fixes for 5.13-rc8/final
Hi Linus, This is a bit bigger than I'd like at this stage, and I guess last week was extra quiet, but it's mostly one fix across 3 drivers to wait for buffer move pinning to complete. There was one locking change that got reverted so it's just noise. Otherwise the amdgpu/nouveau changes are for known regressions, and otherwise it's just misc changes in kmb/atmel/vc4 drivers. I'm off for a few days (have email for some of it), but I'll be back mid next week for hopefully the next PR. Dave. drm-fixes-2021-06-25: drm fixes for 5.13-rc8/final radeon/nouveau/amdgpu/ttm: - same fix in 3 drivers to wait for BO to be pinned after moving it. core: - auth locking change + brown paper bag revert amdgpu: - Revert GFX9, 10 doorbell fixes, we just end up trading one bug for another - Potential memory corruption fix in framebuffer handling nouveau: - fix regression checking dma addresses kmb: - error return fix atmel-hlcdc: - fix kernel warnings at boot - enable async flips vc4: - fix CPU hang due to power management The following changes since commit 13311e74253fe64329390df80bed3f07314ddd61: Linux 5.13-rc7 (2021-06-20 15:03:15 -0700) are available in the Git repository at: git://anongit.freedesktop.org/drm/drm tags/drm-fixes-2021-06-25 for you to fetch changes up to 5e0e7a407675d9f50f1d840214beaec42293b79b: Merge tag 'drm-misc-fixes-2021-06-24' of git://anongit.freedesktop.org/drm/drm-misc into drm-fixes (2021-06-25 06:05:13 +1000) drm fixes for 5.13-rc8/final radeon/nouveau/amdgpu/ttm: - same fix in 3 drivers to wait for BO to be pinned after moving it. core: - auth locking change + brown paper bag revert amdgpu: - Revert GFX9, 10 doorbell fixes, we just end up trading one bug for another - Potential memory corruption fix in framebuffer handling nouveau: - fix regression checking dma addresses kmb: - error return fix atmel-hlcdc: - fix kernel warnings at boot - enable async flips vc4: - fix CPU hang due to power management Christian König (4): drm/nouveau: wait for moving fence after pinning v2 drm/radeon: wait for moving fence after pinning drm/amdgpu: wait for moving fence after pinning drm/nouveau: fix dma_address check for CPU/GPU sync Dan Sneddon (2): drm: atmel_hlcdc: Enable the crtc vblank prior to crtc usage. drm/atmel-hlcdc: Allow async page flips Daniel Vetter (1): Revert "drm: add a locked version of drm_is_current_master" Dave Airlie (2): Merge tag 'amd-drm-fixes-5.13-2021-06-21' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes Merge tag 'drm-misc-fixes-2021-06-24' of git://anongit.freedesktop.org/drm/drm-misc into drm-fixes Desmond Cheong Zhi Xi (1): drm: add a locked version of drm_is_current_master Krzysztof Kozlowski (1): drm/panel: ld9040: reference spi_device_id table Maxime Ripard (2): drm/vc4: hdmi: Move the HSM clock enable to runtime_pm drm/vc4: hdmi: Make sure the controller is powered in detect Michel Dänzer (1): drm/amdgpu: Call drm_framebuffer_init last for framebuffer init Yifan Zhang (2): Revert "drm/amdgpu/gfx10: enlarge CP_MEC_DOORBELL_RANGE_UPPER to cover full doorbell." Revert "drm/amdgpu/gfx9: fix the doorbell missing when in CGPG issue." Zhen Lei (1): drm/kmb: Fix error return code in kmb_hw_init() drivers/gpu/drm/amd/amdgpu/amdgpu_display.c| 12 --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c| 14 +++- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 6 +--- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 +--- drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c | 17 ++ drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c | 1 + drivers/gpu/drm/kmb/kmb_drv.c | 1 + drivers/gpu/drm/nouveau/nouveau_bo.c | 4 +-- drivers/gpu/drm/nouveau/nouveau_prime.c| 17 +- drivers/gpu/drm/panel/panel-samsung-ld9040.c | 1 + drivers/gpu/drm/radeon/radeon_prime.c | 16 -- drivers/gpu/drm/vc4/vc4_hdmi.c | 44 -- 12 files changed, 100 insertions(+), 39 deletions(-)
Re: [PATCH v3 2/2] backlight: lm3630a: convert to atomic PWM API and check for errors
Hi Lee, On Tue, Jun 22, 2021 at 02:12:57PM +0100, Lee Jones wrote: > On Mon, 21 Jun 2021, Uwe Kleine-König wrote: > > > The practical upside here is that this only needs a single API call to > > program the hardware which (depending on the underlaying hardware) can > > be more effective and prevents glitches. > > > > Up to now the return value of the pwm functions was ignored. Fix this > > and propagate the error to the caller. > > > > Signed-off-by: Uwe Kleine-König > > --- > > drivers/video/backlight/lm3630a_bl.c | 42 +--- > > 1 file changed, 19 insertions(+), 23 deletions(-) > > Fixed the subject line and applied, thanks. It's not obvious to me what needed fixing here, and I don't find where you the patches, neither in next nor in https://git.kernel.org/pub/scm/linux/kernel/git/lee/backlight.git; so I cannot check what you actually changed. I assume you did s/lm3630a/lm3630a_bl/ ? I didn't because it felt tautological. Best regards Uwe -- Pengutronix e.K. | Uwe Kleine-König| Industrial Linux Solutions | https://www.pengutronix.de/ | signature.asc Description: PGP signature
Re: [PATCH] dma-buf/sync_file: Don't leak fences on merge failure
I don't have drm-misc access. Mind pushing? On Thu, Jun 24, 2021 at 12:59 PM Christian König wrote: > > Am 24.06.21 um 19:47 schrieb Jason Ekstrand: > > Each add_fence() call does a dma_fence_get() on the relevant fence. In > > the error path, we weren't calling dma_fence_put() so all those fences > > got leaked. Also, in the krealloc_array failure case, we weren't > > freeing the fences array. Instead, ensure that i and fences are always > > zero-initialized and dma_fence_put() all the fences and kfree(fences) on > > every error path. > > > > Signed-off-by: Jason Ekstrand > > Fixes: a02b9dc90d84 ("dma-buf/sync_file: refactor fence storage in struct > > sync_file") > > Cc: Gustavo Padovan > > Cc: Christian König > > Reviewed-by: Christian König > > > --- > > drivers/dma-buf/sync_file.c | 13 +++-- > > 1 file changed, 7 insertions(+), 6 deletions(-) > > > > diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c > > index 20d9bddbb985b..394e6e1e96860 100644 > > --- a/drivers/dma-buf/sync_file.c > > +++ b/drivers/dma-buf/sync_file.c > > @@ -211,8 +211,8 @@ static struct sync_file *sync_file_merge(const char > > *name, struct sync_file *a, > >struct sync_file *b) > > { > > struct sync_file *sync_file; > > - struct dma_fence **fences, **nfences, **a_fences, **b_fences; > > - int i, i_a, i_b, num_fences, a_num_fences, b_num_fences; > > + struct dma_fence **fences = NULL, **nfences, **a_fences, **b_fences; > > + int i = 0, i_a, i_b, num_fences, a_num_fences, b_num_fences; > > > > sync_file = sync_file_alloc(); > > if (!sync_file) > > @@ -236,7 +236,7 @@ static struct sync_file *sync_file_merge(const char > > *name, struct sync_file *a, > >* If a sync_file can only be created with sync_file_merge > >* and sync_file_create, this is a reasonable assumption. > >*/ > > - for (i = i_a = i_b = 0; i_a < a_num_fences && i_b < b_num_fences; ) { > > + for (i_a = i_b = 0; i_a < a_num_fences && i_b < b_num_fences; ) { > > struct dma_fence *pt_a = a_fences[i_a]; > > struct dma_fence *pt_b = b_fences[i_b]; > > > > @@ -277,15 +277,16 @@ static struct sync_file *sync_file_merge(const char > > *name, struct sync_file *a, > > fences = nfences; > > } > > > > - if (sync_file_set_fence(sync_file, fences, i) < 0) { > > - kfree(fences); > > + if (sync_file_set_fence(sync_file, fences, i) < 0) > > goto err; > > - } > > > > strlcpy(sync_file->user_name, name, sizeof(sync_file->user_name)); > > return sync_file; > > > > err: > > + while (i) > > + dma_fence_put(fences[--i]); > > + kfree(fences); > > fput(sync_file->file); > > return NULL; > > >
[PATCH 2/2] drm/ttm, drm/i915: Update ttm_move_memcpy for async use
The buffer object argument to ttm_move_memcpy was only used to determine whether the destination memory should be cleared only or whether we should copy data. Replace it with a "clear" bool, and update the callers. The intention here is to be able to use ttm_move_memcpy() async under a dma-fence as a fallback if an accelerated blit fails in a security- critical path where data might leak if the blit is not properly performed. For that purpose the bo is an unsuitable argument since its relevant members might already have changed at call time. Finally, update the ttm_move_memcpy kerneldoc that seems to have ended up with a stale version. Signed-off-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 2 +- drivers/gpu/drm/ttm/ttm_bo_util.c | 20 ++-- include/drm/ttm/ttm_bo_driver.h | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 4e529adcdfc7..f19847abe856 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -517,7 +517,7 @@ static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, obj->ttm.cached_io_st, src_reg->region.start); - ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); + ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter); } } diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 2f57f824e6db..e3747f069674 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -75,22 +75,21 @@ void ttm_mem_io_free(struct ttm_device *bdev, /** * ttm_move_memcpy - Helper to perform a memcpy ttm move operation. - * @bo: The struct ttm_buffer_object. - * @new_mem: The struct ttm_resource we're moving to (copy destination). - * @new_iter: A struct ttm_kmap_iter representing the destination resource. + * @clear: Whether to clear rather than copy. + * @num_pages: Number of pages of the operation. + * @dst_iter: A struct ttm_kmap_iter representing the destination resource. * @src_iter: A struct ttm_kmap_iter representing the source resource. * * This function is intended to be able to move out async under a * dma-fence if desired. */ -void ttm_move_memcpy(struct ttm_buffer_object *bo, +void ttm_move_memcpy(bool clear, u32 num_pages, struct ttm_kmap_iter *dst_iter, struct ttm_kmap_iter *src_iter) { const struct ttm_kmap_iter_ops *dst_ops = dst_iter->ops; const struct ttm_kmap_iter_ops *src_ops = src_iter->ops; - struct ttm_tt *ttm = bo->ttm; struct dma_buf_map src_map, dst_map; pgoff_t i; @@ -99,10 +98,7 @@ void ttm_move_memcpy(struct ttm_buffer_object *bo, return; /* Don't move nonexistent data. Clear destination instead. */ - if (src_ops->maps_tt && (!ttm || !ttm_tt_is_populated(ttm))) { - if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)) - return; - + if (clear) { for (i = 0; i < num_pages; ++i) { dst_ops->map_local(dst_iter, &dst_map, i); if (dst_map.is_iomem) @@ -146,6 +142,7 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, struct ttm_kmap_iter_linear_io io; } _dst_iter, _src_iter; struct ttm_kmap_iter *dst_iter, *src_iter; + bool clear; int ret = 0; if (ttm && ((ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) || @@ -169,7 +166,10 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, goto out_src_iter; } - ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); + clear = src_iter->ops->maps_tt && (!ttm || !ttm_tt_is_populated(ttm)); + if (!(clear && ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC))) + ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter); + src_copy = *src_mem; ttm_bo_move_sync_cleanup(bo, dst_mem); diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 68d6069572aa..5f087575194b 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -322,7 +322,7 @@ int ttm_bo_tt_bind(struct ttm_buffer_object *bo, struct ttm_resource *mem); */ void ttm_bo_tt_destroy(struct ttm_buffer_object *bo); -void ttm_move_memcpy(struct ttm_buffer_object *bo, +void ttm_move_memcpy(bool clear, u32 num_pages, struct ttm_kmap_iter *dst_iter, struct ttm_kmap_iter *src_iter); -- 2.31.1
[PATCH 1/2] drm/i915/ttm: Reorganize the ttm move code somewhat
In order to make the code a bit more readable and to facilitate async memcpy moves, reorganize the move code a little. Determine at an early stage whether to copy or to clear. Signed-off-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 70 ++--- 1 file changed, 40 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index c39d982c4fa6..4e529adcdfc7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -431,6 +431,7 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, } static int i915_ttm_accel_move(struct ttm_buffer_object *bo, + bool clear, struct ttm_resource *dst_mem, struct sg_table *dst_st) { @@ -449,13 +450,10 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, return -EINVAL; dst_level = i915_ttm_cache_level(i915, dst_mem, ttm); - if (!ttm || !ttm_tt_is_populated(ttm)) { + if (clear) { if (bo->type == ttm_bo_type_kernel) return -EINVAL; - if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)) - return 0; - intel_engine_pm_get(i915->gt.migrate.context->engine); ret = intel_context_migrate_clear(i915->gt.migrate.context, NULL, dst_st->sgl, dst_level, @@ -489,27 +487,53 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, return ret; } -static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, -struct ttm_operation_ctx *ctx, -struct ttm_resource *dst_mem, -struct ttm_place *hop) +static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, + struct ttm_resource *dst_mem, + struct sg_table *dst_st) { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - struct ttm_resource_manager *dst_man = - ttm_manager_type(bo->bdev, dst_mem->mem_type); struct intel_memory_region *dst_reg, *src_reg; union { struct ttm_kmap_iter_tt tt; struct ttm_kmap_iter_iomap io; } _dst_iter, _src_iter; struct ttm_kmap_iter *dst_iter, *src_iter; - struct sg_table *dst_st; int ret; dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type); src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type); GEM_BUG_ON(!dst_reg || !src_reg); + ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_st); + if (ret) { + dst_iter = !cpu_maps_iomem(dst_mem) ? + ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm) : + ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap, +dst_st, dst_reg->region.start); + + src_iter = !cpu_maps_iomem(bo->resource) ? + ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) : + ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap, +obj->ttm.cached_io_st, +src_reg->region.start); + + ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); + } +} + +static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, +struct ttm_operation_ctx *ctx, +struct ttm_resource *dst_mem, +struct ttm_place *hop) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct ttm_resource_manager *dst_man = + ttm_manager_type(bo->bdev, dst_mem->mem_type); + struct ttm_tt *ttm = bo->ttm; + struct sg_table *dst_st; + bool clear; + int ret; + /* Sync for now. We could do the actual copy async. */ ret = ttm_bo_wait_ctx(bo, ctx); if (ret) @@ -526,9 +550,8 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, } /* Populate ttm with pages if needed. Typically system memory. */ - if (bo->ttm && (dst_man->use_tt || - (bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED))) { - ret = ttm_tt_populate(bo->bdev, bo->ttm, ctx); + if (ttm && (dst_man->use_tt || (ttm->page_flags & TTM_PAGE_FLAG_SWAPPED))) { + ret = ttm_tt_populate(bo->bdev, ttm, ctx); if (ret) return ret; } @@ -537,23 +560,10 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, if (IS_ERR(dst_st)) return PTR_ERR(dst_st); - ret = i915_ttm_accel_move(bo, dst_mem, dst_st); - if (ret) { -
[PATCH 0/2] drm/i915, drm/ttm: Update the ttm_move_memcpy() interface
The ttm_move_memcpy() function was intended to be able to be used async under a fence. We are going to utilize that as a fallback if the gpu clearing blit fails before we set up CPU- or GPU ptes to the memory region. But to accomplish that the bo argument to ttm_move_memcpy() needs to be replaced. Patch 1 reorganizes the i915 ttm move code a bit to make the change in patch 2 smaller. Patch 2 updates the ttm_move_memcpy() interface. Thomas Hellström (2): drm/i915/ttm: Reorganize the ttm move code somewhat drm/ttm, drm/i915: Update ttm_move_memcpy for async use drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 70 ++--- drivers/gpu/drm/ttm/ttm_bo_util.c | 20 +++ include/drm/ttm/ttm_bo_driver.h | 2 +- 3 files changed, 51 insertions(+), 41 deletions(-) -- 2.31.1
Re: [PATCH v2 0/2] R-Car DU: Add DSI encoder driver for V3U
Hi Laurent, On Wed, Jun 23, 2021 at 04:56:37PM +0300, Laurent Pinchart wrote: > Hello, > > This patch series adds a driver for the DSI encoder found in the R-Car > V3U SoC, which is the first SoC in the family that supports DSI. > > The driver is based on an implementation from the BSP written by Luu > Hoai, with lots of further rework. If anyone is interested in the > history, a broken down v1 is available from [1]. > > The driver has been successfully tested on the Falcon board, which > connects an SN65DSI86 to the output of the DSI encoder. Further patches > are needed for the ti-sn65dsi86 driver to enable usage in plain DP > (non-eDP) mode, as the Falcon board has a mini-DP connector wired to the > output of the SN65DSI86. This is work in progress (see [2]). > > [1] > https://lore.kernel.org/linux-renesas-soc/189c7a07-29cd-39f4-1dec-56aa94c11...@ideasonboard.com/T/#mafbe130386cbc8326f85cad46860813516ef4a80 > [2] > https://lore.kernel.org/linux-renesas-soc/20210322030128.2283-1-laurent.pinchart+rene...@ideasonboard.com/ > > LUU HOAI (1): > drm: rcar-du: Add R-Car DSI driver > > Laurent Pinchart (1): > dt-bindings: display: bridge: Add binding for R-Car MIPI DSI/CSI-2 TX I have browsed both patches. Both looks good: Acked-by: Sam Ravnborg Sam
Re: [PATCH v14 06/12] swiotlb: Use is_swiotlb_force_bounce for swiotlb data bouncing
On Thu, Jun 24, 2021 at 11:58:57PM +0800, Claire Chang wrote: > On Thu, Jun 24, 2021 at 11:56 PM Konrad Rzeszutek Wilk > wrote: > > > > On Thu, Jun 24, 2021 at 10:10:51AM -0400, Qian Cai wrote: > > > > > > > > > On 6/24/2021 7:48 AM, Will Deacon wrote: > > > > Ok, diff below which attempts to tackle the offset issue I mentioned as > > > > well. Qian Cai -- please can you try with these changes? > > > > > > This works fine. > > > > Cool. Let me squash this patch in #6 and rebase the rest of them. > > > > Claire, could you check the devel/for-linus-5.14 say by end of today to > > double check that I didn't mess anything up please? > > I just submitted v15 here > (https://lore.kernel.org/patchwork/cover/1451322/) in case it's > helpful. Oh! Nice! > I'll double check of course. Thanks for the efforts! I ended up using your patch #6 and #7. Please double-check.
Re: [PATCH v15 00/12] Restricted DMA
On Thu, Jun 24, 2021 at 11:55:14PM +0800, Claire Chang wrote: > This series implements mitigations for lack of DMA access control on > systems without an IOMMU, which could result in the DMA accessing the > system memory at unexpected times and/or unexpected addresses, possibly > leading to data leakage or corruption. > > For example, we plan to use the PCI-e bus for Wi-Fi and that PCI-e bus is > not behind an IOMMU. As PCI-e, by design, gives the device full access to > system memory, a vulnerability in the Wi-Fi firmware could easily escalate > to a full system exploit (remote wifi exploits: [1a], [1b] that shows a > full chain of exploits; [2], [3]). > > To mitigate the security concerns, we introduce restricted DMA. Restricted > DMA utilizes the existing swiotlb to bounce streaming DMA in and out of a > specially allocated region and does memory allocation from the same region. > The feature on its own provides a basic level of protection against the DMA > overwriting buffer contents at unexpected times. However, to protect > against general data leakage and system memory corruption, the system needs > to provide a way to restrict the DMA to a predefined memory region (this is > usually done at firmware level, e.g. MPU in ATF on some ARM platforms [4]). > > [1a] > https://googleprojectzero.blogspot.com/2017/04/over-air-exploiting-broadcoms-wi-fi_4.html > [1b] > https://googleprojectzero.blogspot.com/2017/04/over-air-exploiting-broadcoms-wi-fi_11.html > [2] https://blade.tencent.com/en/advisories/qualpwn/ > [3] > https://www.bleepingcomputer.com/news/security/vulnerabilities-found-in-highly-popular-firmware-for-wifi-chips/ > [4] > https://github.com/ARM-software/arm-trusted-firmware/blob/master/plat/mediatek/mt8183/drivers/emi_mpu/emi_mpu.c#L132 > > v15: > - Apply Will's diff (https://lore.kernel.org/patchwork/patch/1448957/#1647521) > to fix the crash reported by Qian. > - Add Stefano's Acked-by tag for patch 01/12 from v14 That all should be now be on https://git.kernel.org/pub/scm/linux/kernel/git/konrad/swiotlb.git/ devel/for-linus-5.14 (and linux-next)
[PULL] drm-misc-fixes
Hi Dave, Daniel, Here's this week drm-misc-fixes PR Thanks! Maxime drm-misc-fixes-2021-06-24: A DMA address check for nouveau, an error code return fix for kmb, fixes to wait for a moving fence after pinning the BO for amdgpu, nouveau and radeon, a crtc and async page flip fix for atmel-hlcdc and a cpu hang fix for vc4. The following changes since commit c336a5ee984708db4826ef9e47d184e638e29717: drm: Lock pointer access in drm_master_release() (2021-06-10 12:22:02 +0200) are available in the Git repository at: git://anongit.freedesktop.org/drm/drm-misc tags/drm-misc-fixes-2021-06-24 for you to fetch changes up to d330099115597bbc238d6758a4930e72b49ea9ba: drm/nouveau: fix dma_address check for CPU/GPU sync (2021-06-24 15:40:44 +0200) A DMA address check for nouveau, an error code return fix for kmb, fixes to wait for a moving fence after pinning the BO for amdgpu, nouveau and radeon, a crtc and async page flip fix for atmel-hlcdc and a cpu hang fix for vc4. Christian König (4): drm/nouveau: wait for moving fence after pinning v2 drm/radeon: wait for moving fence after pinning drm/amdgpu: wait for moving fence after pinning drm/nouveau: fix dma_address check for CPU/GPU sync Dan Sneddon (2): drm: atmel_hlcdc: Enable the crtc vblank prior to crtc usage. drm/atmel-hlcdc: Allow async page flips Daniel Vetter (1): Revert "drm: add a locked version of drm_is_current_master" Desmond Cheong Zhi Xi (1): drm: add a locked version of drm_is_current_master Krzysztof Kozlowski (1): drm/panel: ld9040: reference spi_device_id table Maxime Ripard (2): drm/vc4: hdmi: Move the HSM clock enable to runtime_pm drm/vc4: hdmi: Make sure the controller is powered in detect Zhen Lei (1): drm/kmb: Fix error return code in kmb_hw_init() drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c| 14 +++- drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c | 17 ++ drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c | 1 + drivers/gpu/drm/kmb/kmb_drv.c | 1 + drivers/gpu/drm/nouveau/nouveau_bo.c | 4 +-- drivers/gpu/drm/nouveau/nouveau_prime.c| 17 +- drivers/gpu/drm/panel/panel-samsung-ld9040.c | 1 + drivers/gpu/drm/radeon/radeon_prime.c | 14 ++-- drivers/gpu/drm/vc4/vc4_hdmi.c | 44 -- 9 files changed, 90 insertions(+), 23 deletions(-) signature.asc Description: PGP signature
Re: [PATCH 2/6] KVM: mmu: also return page from gfn_to_pfn
Excerpts from Paolo Bonzini's message of June 24, 2021 7:42 pm: > On 24/06/21 10:52, Nicholas Piggin wrote: >>> For now, wrap all calls to gfn_to_pfn functions in the new helper >>> function. Callers which don't need the page struct will be updated in >>> follow-up patches. >> Hmm. You mean callers that do need the page will be updated? Normally >> if there will be leftover users that don't need the struct page then >> you would go the other way and keep the old call the same, and add a new >> one (gfn_to_pfn_page) just for those that need it. > > Needing kvm_pfn_page_unwrap is a sign that something might be buggy, so > it's a good idea to move the short name to the common case and the ugly > kvm_pfn_page_unwrap(gfn_to_pfn(...)) for the weird one. In fact I'm not > sure there should be any kvm_pfn_page_unwrap in the end. If all callers were updated that is one thing, but from the changelog it sounds like that would not happen and there would be some gfn_to_pfn users left over. But yes in the end you would either need to make gfn_to_pfn never return a page found via follow_pte, or change all callers to the new way. If the plan is for the latter then I guess that's fine. Thanks, Nick
Re: [PATCH 2/6] KVM: mmu: also return page from gfn_to_pfn
Excerpts from Paolo Bonzini's message of June 24, 2021 8:21 pm: > On 24/06/21 12:17, Nicholas Piggin wrote: >>> If all callers were updated that is one thing, but from the changelog >>> it sounds like that would not happen and there would be some gfn_to_pfn >>> users left over. >>> >>> But yes in the end you would either need to make gfn_to_pfn never return >>> a page found via follow_pte, or change all callers to the new way. If >>> the plan is for the latter then I guess that's fine. >> >> Actually in that case anyway I don't see the need -- the existence of >> gfn_to_pfn is enough to know it might be buggy. It can just as easily >> be grepped for as kvm_pfn_page_unwrap. > > Sure, but that would leave us with longer function names > (gfn_to_pfn_page* instead of gfn_to_pfn*). So the "safe" use is the one > that looks worse and the unsafe use is the one that looks safe. The churn isn't justified because of function name length. Choose g2pp() if you want a non-descriptive but short name. The existing name isn't good anyway because it not only looks up a pfn but also a page, and more importantly it gets a ref on the page. The name should be changed if you introduce a new API. >> And are gfn_to_page cases also >> vulernable to the same issue? > > No, they're just broken for the VM_IO|VM_PFNMAP case. No they aren't vulnerable, or they are vunlerable but also broken in other cases? Thanks, Nick
Re: [PATCH 06/11] drm/v3d: Move drm_sched_job_init to v3d_job_init
On Thu, Jun 24, 2021 at 7:00 AM Daniel Vetter wrote: > > Prep work for using the scheduler dependency handling. > > Signed-off-by: Daniel Vetter > Cc: Emma Anholt Back when I wrote this, I think there were rules that there had to be no failure paths between a job_init and a push. Has that changed? I really don't have the context to evaluate this, I'm not sure what new "scheduler dependency handling" is given that there was already something that I considered to be dependency handling!
[PATCH 08/12] media: hantro: Add H.264 support for Rockchip VDPU2
From: Jonas Karlman Rockchip VDPU2 core is present on RK3328, RK3326/PX30, RK3399 and others. It's similar to Hantro G1, but it's not compatible with it. Signed-off-by: Jonas Karlman Signed-off-by: Ezequiel Garcia --- drivers/staging/media/hantro/Makefile | 1 + drivers/staging/media/hantro/hantro_hw.h | 1 + .../media/hantro/rockchip_vpu2_hw_h264_dec.c | 491 ++ 3 files changed, 493 insertions(+) create mode 100644 drivers/staging/media/hantro/rockchip_vpu2_hw_h264_dec.c diff --git a/drivers/staging/media/hantro/Makefile b/drivers/staging/media/hantro/Makefile index 287370188d2a..90036831fec4 100644 --- a/drivers/staging/media/hantro/Makefile +++ b/drivers/staging/media/hantro/Makefile @@ -13,6 +13,7 @@ hantro-vpu-y += \ hantro_g2_hevc_dec.o \ hantro_g1_vp8_dec.o \ rockchip_vpu2_hw_jpeg_enc.o \ + rockchip_vpu2_hw_h264_dec.o \ rockchip_vpu2_hw_mpeg2_dec.o \ rockchip_vpu2_hw_vp8_dec.o \ hantro_jpeg.o \ diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/media/hantro/hantro_hw.h index 7a8048afe357..9296624654a6 100644 --- a/drivers/staging/media/hantro/hantro_hw.h +++ b/drivers/staging/media/hantro/hantro_hw.h @@ -241,6 +241,7 @@ dma_addr_t hantro_h264_get_ref_buf(struct hantro_ctx *ctx, u16 hantro_h264_get_ref_nbr(struct hantro_ctx *ctx, unsigned int dpb_idx); int hantro_h264_dec_prepare_run(struct hantro_ctx *ctx); +int rockchip_vpu2_h264_dec_run(struct hantro_ctx *ctx); int hantro_g1_h264_dec_run(struct hantro_ctx *ctx); int hantro_h264_dec_init(struct hantro_ctx *ctx); void hantro_h264_dec_exit(struct hantro_ctx *ctx); diff --git a/drivers/staging/media/hantro/rockchip_vpu2_hw_h264_dec.c b/drivers/staging/media/hantro/rockchip_vpu2_hw_h264_dec.c new file mode 100644 index ..64a6330475eb --- /dev/null +++ b/drivers/staging/media/hantro/rockchip_vpu2_hw_h264_dec.c @@ -0,0 +1,491 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hantro VPU codec driver + * + * Copyright (c) 2014 Rockchip Electronics Co., Ltd. + * Hertz Wong + * Herman Chen + * + * Copyright (C) 2014 Google, Inc. + * Tomasz Figa + */ + +#include +#include + +#include + +#include "hantro_hw.h" +#include "hantro_v4l2.h" + +#define VDPU_SWREG(nr) ((nr) * 4) + +#define VDPU_REG_DEC_OUT_BASE VDPU_SWREG(63) +#define VDPU_REG_RLC_VLC_BASE VDPU_SWREG(64) +#define VDPU_REG_QTABLE_BASE VDPU_SWREG(61) +#define VDPU_REG_DIR_MV_BASE VDPU_SWREG(62) +#define VDPU_REG_REFER_BASE(i) (VDPU_SWREG(84 + (i))) +#define VDPU_REG_DEC_E(v) ((v) ? BIT(0) : 0) + +#define VDPU_REG_DEC_ADV_PRE_DIS(v)((v) ? BIT(11) : 0) +#define VDPU_REG_DEC_SCMD_DIS(v) ((v) ? BIT(10) : 0) +#define VDPU_REG_FILTERING_DIS(v) ((v) ? BIT(8) : 0) +#define VDPU_REG_PIC_FIXED_QUANT(v)((v) ? BIT(7) : 0) +#define VDPU_REG_DEC_LATENCY(v)(((v) << 1) & GENMASK(6, 1)) + +#define VDPU_REG_INIT_QP(v)(((v) << 25) & GENMASK(30, 25)) +#define VDPU_REG_STREAM_LEN(v) (((v) << 0) & GENMASK(23, 0)) + +#define VDPU_REG_APF_THRESHOLD(v) (((v) << 17) & GENMASK(30, 17)) +#define VDPU_REG_STARTMB_X(v) (((v) << 8) & GENMASK(16, 8)) +#define VDPU_REG_STARTMB_Y(v) (((v) << 0) & GENMASK(7, 0)) + +#define VDPU_REG_DEC_MODE(v) (((v) << 0) & GENMASK(3, 0)) + +#define VDPU_REG_DEC_STRENDIAN_E(v)((v) ? BIT(5) : 0) +#define VDPU_REG_DEC_STRSWAP32_E(v)((v) ? BIT(4) : 0) +#define VDPU_REG_DEC_OUTSWAP32_E(v)((v) ? BIT(3) : 0) +#define VDPU_REG_DEC_INSWAP32_E(v) ((v) ? BIT(2) : 0) +#define VDPU_REG_DEC_OUT_ENDIAN(v) ((v) ? BIT(1) : 0) +#define VDPU_REG_DEC_IN_ENDIAN(v) ((v) ? BIT(0) : 0) + +#define VDPU_REG_DEC_DATA_DISC_E(v)((v) ? BIT(22) : 0) +#define VDPU_REG_DEC_MAX_BURST(v) (((v) << 16) & GENMASK(20, 16)) +#define VDPU_REG_DEC_AXI_WR_ID(v) (((v) << 8) & GENMASK(15, 8)) +#define VDPU_REG_DEC_AXI_RD_ID(v) (((v) << 0) & GENMASK(7, 0)) + +#define VDPU_REG_START_CODE_E(v) ((v) ? BIT(22) : 0) +#define VDPU_REG_CH_8PIX_ILEAV_E(v)((v) ? BIT(21) : 0) +#define VDPU_REG_RLC_MODE_E(v) ((v) ? BIT(20) : 0) +#define VDPU_REG_PIC_INTERLACE_E(v)((v) ? BIT(17) : 0) +#define VDPU_REG_PIC_FIELDMODE_E(v)((v) ? BIT(16) : 0) +#define VDPU_REG_PIC_TOPFIELD_E(v) ((v) ? BIT(13) : 0) +#define VDPU_REG_WRITE_MVS_E(v)((v) ? BIT(10) : 0) +#define VDPU_REG_SEQ_MBAFF_E(v)((v) ? BIT(7) : 0) +#define VDPU_REG_PICORD_COUNT_E(v) ((v) ? BIT(6) : 0) +#define VDPU_REG_DEC_TIMEOUT_E(v) ((v) ? BIT(5) : 0) +#define VDPU_REG_DEC_CLK_GATE_E(v) ((v) ? BIT(4) : 0) + +#define VDPU_REG_PRED_BC_TAP_0_0(v)(((v) << 22) & GENMASK(31, 22)) +#define VDPU_REG_PRED_BC_TAP_0_1(v)(((v) << 12) & GENMASK(21, 12)) +#define VDPU_REG_PRED_BC_TAP_0_2(v)(((v) << 2)
[PATCH 04/12] hantro: Make struct hantro_variant.init() optional
The hantro_variant.init() function is there for platforms to perform hardware-specific initialization, such as clock rate bumping. Not all platforms require it, so make it optional. Signed-off-by: Ezequiel Garcia --- drivers/staging/media/hantro/hantro.h | 4 ++-- drivers/staging/media/hantro/hantro_drv.c | 10 ++ drivers/staging/media/hantro/sama5d4_vdec_hw.c | 6 -- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/staging/media/hantro/hantro.h b/drivers/staging/media/hantro/hantro.h index a70c386de6f1..c2e2dca38628 100644 --- a/drivers/staging/media/hantro/hantro.h +++ b/drivers/staging/media/hantro/hantro.h @@ -61,8 +61,8 @@ struct hantro_irq { * @num_postproc_fmts: Number of post-processor formats. * @codec: Supported codecs * @codec_ops: Codec ops. - * @init: Initialize hardware. - * @runtime_resume:reenable hardware after power gating + * @init: Initialize hardware, optional. + * @runtime_resume:reenable hardware after power gating, optional. * @irqs: array of irq names and interrupt handlers * @num_irqs: number of irqs in the array * @clk_names: array of clock names diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c index 31d8449ca1d2..9b5415176bfe 100644 --- a/drivers/staging/media/hantro/hantro_drv.c +++ b/drivers/staging/media/hantro/hantro_drv.c @@ -942,10 +942,12 @@ static int hantro_probe(struct platform_device *pdev) } } - ret = vpu->variant->init(vpu); - if (ret) { - dev_err(&pdev->dev, "Failed to init VPU hardware\n"); - return ret; + if (vpu->variant->init) { + ret = vpu->variant->init(vpu); + if (ret) { + dev_err(&pdev->dev, "Failed to init VPU hardware\n"); + return ret; + } } pm_runtime_set_autosuspend_delay(vpu->dev, 100); diff --git a/drivers/staging/media/hantro/sama5d4_vdec_hw.c b/drivers/staging/media/hantro/sama5d4_vdec_hw.c index 58ae72c2b723..9c3b8cd0b239 100644 --- a/drivers/staging/media/hantro/sama5d4_vdec_hw.c +++ b/drivers/staging/media/hantro/sama5d4_vdec_hw.c @@ -64,11 +64,6 @@ static const struct hantro_fmt sama5d4_vdec_fmts[] = { }, }; -static int sama5d4_hw_init(struct hantro_dev *vpu) -{ - return 0; -} - /* * Supported codec ops. */ @@ -109,7 +104,6 @@ const struct hantro_variant sama5d4_vdec_variant = { .codec = HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER | HANTRO_H264_DECODER, .codec_ops = sama5d4_vdec_codec_ops, - .init = sama5d4_hw_init, .irqs = sama5d4_irqs, .num_irqs = ARRAY_SIZE(sama5d4_irqs), .clk_names = sama5d4_clk_names, -- 2.30.0
Re: [PATCH 0/6] KVM: Remove uses of struct page from x86 and arm64 MMU
Excerpts from Paolo Bonzini's message of June 24, 2021 10:41 pm: > On 24/06/21 13:42, Nicholas Piggin wrote: >> Excerpts from Nicholas Piggin's message of June 24, 2021 8:34 pm: >>> Excerpts from David Stevens's message of June 24, 2021 1:57 pm: KVM supports mapping VM_IO and VM_PFNMAP memory into the guest by using follow_pte in gfn_to_pfn. However, the resolved pfns may not have assoicated struct pages, so they should not be passed to pfn_to_page. This series removes such calls from the x86 and arm64 secondary MMU. To do this, this series modifies gfn_to_pfn to return a struct page in addition to a pfn, if the hva was resolved by gup. This allows the caller to call put_page only when necessated by gup. This series provides a helper function that unwraps the new return type of gfn_to_pfn to provide behavior identical to the old behavior. As I have no hardware to test powerpc/mips changes, the function is used there for minimally invasive changes. Additionally, as gfn_to_page and gfn_to_pfn_cache are not integrated with mmu notifier, they cannot be easily changed over to only use pfns. This addresses CVE-2021-22543 on x86 and arm64. >>> >>> Does this fix the problem? (untested I don't have a POC setup at hand, >>> but at least in concept) >> >> This one actually compiles at least. Unfortunately I don't have much >> time in the near future to test, and I only just found out about this >> CVE a few hours ago. > > And it also works (the reproducer gets an infinite stream of userspace > exits and especially does not crash). We can still go for David's > solution later since MMU notifiers are able to deal with this pages, but > it's a very nice patch for stable kernels. Oh nice, thanks for testing. How's this? Thanks, Nick --- KVM: Fix page ref underflow for regions with valid but non-refcounted pages It's possible to create a region which maps valid but non-refcounted pages (e.g., tail pages of non-compound higher order allocations). These host pages can then be returned by gfn_to_page, gfn_to_pfn, etc., family of APIs, which take a reference to the page, which takes it from 0 to 1. When the reference is dropped, this will free the page incorrectly. Fix this by only taking a reference on the page if it was non-zero, which indicates it is participating in normal refcounting (and can be released with put_page). Signed-off-by: Nicholas Piggin --- virt/kvm/kvm_main.c | 19 +-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6a6bc7af0e28..46fb042837d2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2055,6 +2055,13 @@ static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault) return true; } +static int kvm_try_get_pfn(kvm_pfn_t pfn) +{ + if (kvm_is_reserved_pfn(pfn)) + return 1; + return get_page_unless_zero(pfn_to_page(pfn)); +} + static int hva_to_pfn_remapped(struct vm_area_struct *vma, unsigned long addr, bool *async, bool write_fault, bool *writable, @@ -2104,13 +2111,21 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, * Whoever called remap_pfn_range is also going to call e.g. * unmap_mapping_range before the underlying pages are freed, * causing a call to our MMU notifier. +* +* Certain IO or PFNMAP mappings can be backed with valid +* struct pages, but be allocated without refcounting e.g., +* tail pages of non-compound higher order allocations, which +* would then underflow the refcount when the caller does the +* required put_page. Don't allow those pages here. */ - kvm_get_pfn(pfn); + if (!kvm_try_get_pfn(pfn)) + r = -EFAULT; out: pte_unmap_unlock(ptep, ptl); *p_pfn = pfn; - return 0; + + return r; } /* -- 2.23.0
[PATCH 09/12] media: hantro: Enable H.264 on Rockchip VDPU2
Given H.264 support for VDPU2 was just added, let's enable it. For now, this is only enabled on platform that don't have an RKVDEC core, such as RK3328. Signed-off-by: Ezequiel Garcia --- .../staging/media/hantro/rockchip_vpu_hw.c| 26 ++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/staging/media/hantro/rockchip_vpu_hw.c b/drivers/staging/media/hantro/rockchip_vpu_hw.c index 3ccc16413f42..e4e3b5e7689b 100644 --- a/drivers/staging/media/hantro/rockchip_vpu_hw.c +++ b/drivers/staging/media/hantro/rockchip_vpu_hw.c @@ -162,6 +162,19 @@ static const struct hantro_fmt rk3399_vpu_dec_fmts[] = { .fourcc = V4L2_PIX_FMT_NV12, .codec_mode = HANTRO_MODE_NONE, }, + { + .fourcc = V4L2_PIX_FMT_H264_SLICE, + .codec_mode = HANTRO_MODE_H264_DEC, + .max_depth = 2, + .frmsize = { + .min_width = 48, + .max_width = 1920, + .step_width = MB_DIM, + .min_height = 48, + .max_height = 1088, + .step_height = MB_DIM, + }, + }, { .fourcc = V4L2_PIX_FMT_MPEG2_SLICE, .codec_mode = HANTRO_MODE_MPEG2_DEC, @@ -388,6 +401,12 @@ static const struct hantro_codec_ops rk3399_vpu_codec_ops[] = { .init = hantro_jpeg_enc_init, .exit = hantro_jpeg_enc_exit, }, + [HANTRO_MODE_H264_DEC] = { + .run = rockchip_vpu2_h264_dec_run, + .reset = rockchip_vpu2_dec_reset, + .init = hantro_h264_dec_init, + .exit = hantro_h264_dec_exit, + }, [HANTRO_MODE_MPEG2_DEC] = { .run = rockchip_vpu2_mpeg2_dec_run, .reset = rockchip_vpu2_dec_reset, @@ -433,6 +452,8 @@ static const char * const rockchip_vpu_clk_names[] = { "aclk", "hclk" }; +/* VDPU1/VEPU1 */ + const struct hantro_variant rk3036_vpu_variant = { .dec_offset = 0x400, .dec_fmts = rk3066_vpu_dec_fmts, @@ -495,11 +516,14 @@ const struct hantro_variant rk3288_vpu_variant = { .num_clocks = ARRAY_SIZE(rockchip_vpu_clk_names) }; +/* VDPU2/VEPU2 */ + const struct hantro_variant rk3328_vpu_variant = { .dec_offset = 0x400, .dec_fmts = rk3399_vpu_dec_fmts, .num_dec_fmts = ARRAY_SIZE(rk3399_vpu_dec_fmts), - .codec = HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER, + .codec = HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER | +HANTRO_H264_DECODER, .codec_ops = rk3399_vpu_codec_ops, .irqs = rockchip_vdpu2_irqs, .num_irqs = ARRAY_SIZE(rockchip_vdpu2_irqs), -- 2.30.0
Re: [PATCH 3/6] KVM: x86/mmu: avoid struct page in MMU
Excerpts from David Stevens's message of June 24, 2021 1:57 pm: > From: David Stevens > out_unlock: > if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa)) > read_unlock(&vcpu->kvm->mmu_lock); > else > write_unlock(&vcpu->kvm->mmu_lock); > - kvm_release_pfn_clean(pfn); > + if (pfnpg.page) > + put_page(pfnpg.page); > return r; > } How about kvm_release_pfn_page_clean(pfnpg); Thanks, Nick
[PATCH 07/12] media: hantro: h264: Move reference picture number to a helper
Add a hantro_h264_get_ref_nbr() helper function to get the reference picture numbers. This will be used by the Rockchip VDPU2 H.264 driver. Signed-off-by: Ezequiel Garcia --- drivers/staging/media/hantro/hantro_g1_h264_dec.c | 14 ++ drivers/staging/media/hantro/hantro_h264.c| 11 +++ drivers/staging/media/hantro/hantro_hw.h | 2 ++ 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/staging/media/hantro/hantro_g1_h264_dec.c b/drivers/staging/media/hantro/hantro_g1_h264_dec.c index 6faacfc44c7c..236ce24ca00c 100644 --- a/drivers/staging/media/hantro/hantro_g1_h264_dec.c +++ b/drivers/staging/media/hantro/hantro_g1_h264_dec.c @@ -126,7 +126,6 @@ static void set_params(struct hantro_ctx *ctx, struct vb2_v4l2_buffer *src_buf) static void set_ref(struct hantro_ctx *ctx) { - struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb; const u8 *b0_reflist, *b1_reflist, *p_reflist; struct hantro_dev *vpu = ctx->dev; int reg_num; @@ -143,17 +142,8 @@ static void set_ref(struct hantro_ctx *ctx) * subsequential reference pictures. */ for (i = 0; i < HANTRO_H264_DPB_SIZE; i += 2) { - reg = 0; - if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) - reg |= G1_REG_REF_PIC_REFER0_NBR(dpb[i].pic_num); - else - reg |= G1_REG_REF_PIC_REFER0_NBR(dpb[i].frame_num); - - if (dpb[i + 1].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) - reg |= G1_REG_REF_PIC_REFER1_NBR(dpb[i + 1].pic_num); - else - reg |= G1_REG_REF_PIC_REFER1_NBR(dpb[i + 1].frame_num); - + reg = G1_REG_REF_PIC_REFER0_NBR(hantro_h264_get_ref_nbr(ctx, i)) | + G1_REG_REF_PIC_REFER1_NBR(hantro_h264_get_ref_nbr(ctx, i + 1)); vdpu_write_relaxed(vpu, reg, G1_REG_REF_PIC(i / 2)); } diff --git a/drivers/staging/media/hantro/hantro_h264.c b/drivers/staging/media/hantro/hantro_h264.c index 6d72136760e7..0b4d2491be3b 100644 --- a/drivers/staging/media/hantro/hantro_h264.c +++ b/drivers/staging/media/hantro/hantro_h264.c @@ -348,6 +348,17 @@ dma_addr_t hantro_h264_get_ref_buf(struct hantro_ctx *ctx, return dma_addr; } +u16 hantro_h264_get_ref_nbr(struct hantro_ctx *ctx, unsigned int dpb_idx) +{ + const struct v4l2_h264_dpb_entry *dpb = &ctx->h264_dec.dpb[dpb_idx]; + + if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) + return 0; + if (dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) + return dpb->pic_num; + return dpb->frame_num; +} + int hantro_h264_dec_prepare_run(struct hantro_ctx *ctx) { struct hantro_h264_dec_hw_ctx *h264_ctx = &ctx->h264_dec; diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/media/hantro/hantro_hw.h index ce678fedaad6..7a8048afe357 100644 --- a/drivers/staging/media/hantro/hantro_hw.h +++ b/drivers/staging/media/hantro/hantro_hw.h @@ -238,6 +238,8 @@ void hantro_jpeg_enc_done(struct hantro_ctx *ctx); dma_addr_t hantro_h264_get_ref_buf(struct hantro_ctx *ctx, unsigned int dpb_idx); +u16 hantro_h264_get_ref_nbr(struct hantro_ctx *ctx, + unsigned int dpb_idx); int hantro_h264_dec_prepare_run(struct hantro_ctx *ctx); int hantro_g1_h264_dec_run(struct hantro_ctx *ctx); int hantro_h264_dec_init(struct hantro_ctx *ctx); -- 2.30.0
[PATCH 10/12] dt-bindings: media: rockchip-vpu: Add PX30 compatible
From: Paul Kocialkowski The Rockchip PX30 SoC has a Hantro VPU that features a decoder (VDPU2) and an encoder (VEPU2). Signed-off-by: Paul Kocialkowski Signed-off-by: Ezequiel Garcia --- Documentation/devicetree/bindings/media/rockchip-vpu.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/media/rockchip-vpu.yaml b/Documentation/devicetree/bindings/media/rockchip-vpu.yaml index b88172a59de7..3b9c5aa91fcc 100644 --- a/Documentation/devicetree/bindings/media/rockchip-vpu.yaml +++ b/Documentation/devicetree/bindings/media/rockchip-vpu.yaml @@ -28,6 +28,9 @@ properties: - items: - const: rockchip,rk3228-vpu - const: rockchip,rk3399-vpu + - items: + - const: rockchip,px30-vpu + - const: rockchip,rk3399-vpu reg: maxItems: 1 -- 2.30.0
Re: [PATCH 2/6] KVM: mmu: also return page from gfn_to_pfn
Excerpts from Nicholas Piggin's message of June 24, 2021 7:57 pm: > Excerpts from Paolo Bonzini's message of June 24, 2021 7:42 pm: >> On 24/06/21 10:52, Nicholas Piggin wrote: For now, wrap all calls to gfn_to_pfn functions in the new helper function. Callers which don't need the page struct will be updated in follow-up patches. >>> Hmm. You mean callers that do need the page will be updated? Normally >>> if there will be leftover users that don't need the struct page then >>> you would go the other way and keep the old call the same, and add a new >>> one (gfn_to_pfn_page) just for those that need it. >> >> Needing kvm_pfn_page_unwrap is a sign that something might be buggy, so >> it's a good idea to move the short name to the common case and the ugly >> kvm_pfn_page_unwrap(gfn_to_pfn(...)) for the weird one. In fact I'm not >> sure there should be any kvm_pfn_page_unwrap in the end. > > If all callers were updated that is one thing, but from the changelog > it sounds like that would not happen and there would be some gfn_to_pfn > users left over. > > But yes in the end you would either need to make gfn_to_pfn never return > a page found via follow_pte, or change all callers to the new way. If > the plan is for the latter then I guess that's fine. Actually in that case anyway I don't see the need -- the existence of gfn_to_pfn is enough to know it might be buggy. It can just as easily be grepped for as kvm_pfn_page_unwrap. And are gfn_to_page cases also vulernable to the same issue? So I think it could be marked deprecated or something if not everything will be converted in the one series, and don't need to touch all that arch code with this patch. Thanks, Nick
[PATCH 11/12] arm64: dts: rockchip: Add VPU support for the PX30
From: Paul Kocialkowski The PX30 has a VPU (both decoder and encoder) with a dedicated IOMMU. Describe these two entities in device-tree. Signed-off-by: Paul Kocialkowski Signed-off-by: Ezequiel Garcia --- arch/arm64/boot/dts/rockchip/px30.dtsi | 23 +++ 1 file changed, 23 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/px30.dtsi b/arch/arm64/boot/dts/rockchip/px30.dtsi index 09baa8a167ce..892eb074775b 100644 --- a/arch/arm64/boot/dts/rockchip/px30.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30.dtsi @@ -1016,6 +1016,29 @@ gpu: gpu@ff40 { status = "disabled"; }; + vpu: video-codec@ff442000 { + compatible = "rockchip,px30-vpu", "rockchip,rk3399-vpu"; + reg = <0x0 0xff442000 0x0 0x800>; + interrupts = , +; + interrupt-names = "vepu", "vdpu"; + clocks = <&cru ACLK_VPU>, <&cru HCLK_VPU>; + clock-names = "aclk", "hclk"; + iommus = <&vpu_mmu>; + power-domains = <&power PX30_PD_VPU>; + }; + + vpu_mmu: iommu@ff442800 { + compatible = "rockchip,iommu"; + reg = <0x0 0xff442800 0x0 0x100>; + interrupts = ; + interrupt-names = "vpu_mmu"; + clocks = <&cru ACLK_VPU>, <&cru HCLK_VPU>; + clock-names = "aclk", "iface"; + #iommu-cells = <0>; + power-domains = <&power PX30_PD_VPU>; + }; + dsi: dsi@ff45 { compatible = "rockchip,px30-mipi-dsi"; reg = <0x0 0xff45 0x0 0x1>; -- 2.30.0
[PATCH 02/12] arm64: dts: rockchip: Add panel orientation to Odroid Go Advance
The Odroid Go Advance panel is rotated, so let's reflect this in the device tree. Signed-off-by: Ezequiel Garcia --- arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts b/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts index 49c97f76df77..cca19660e60a 100644 --- a/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts +++ b/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts @@ -240,6 +240,7 @@ panel@0 { iovcc-supply = <&vcc_lcd>; reset-gpios = <&gpio3 RK_PC0 GPIO_ACTIVE_LOW>; vdd-supply = <&vcc_lcd>; + rotation = <270>; port { mipi_in_panel: endpoint { -- 2.30.0
Re: [PATCH 1/6] KVM: x86/mmu: release audited pfns
Excerpts from David Stevens's message of June 24, 2021 1:57 pm: > From: David Stevens Changelog? This looks like a bug, should it have a Fixes: tag? Thanks, Nick > > Signed-off-by: David Stevens > --- > arch/x86/kvm/mmu/mmu_audit.c | 2 ++ > 1 file changed, 2 insertions(+) > > diff --git a/arch/x86/kvm/mmu/mmu_audit.c b/arch/x86/kvm/mmu/mmu_audit.c > index cedc17b2f60e..97ff184084b4 100644 > --- a/arch/x86/kvm/mmu/mmu_audit.c > +++ b/arch/x86/kvm/mmu/mmu_audit.c > @@ -121,6 +121,8 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 > *sptep, int level) > audit_printk(vcpu->kvm, "levels %d pfn %llx hpa %llx " >"ent %llxn", vcpu->arch.mmu->root_level, pfn, >hpa, *sptep); > + > + kvm_release_pfn_clean(pfn); > } > > static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) > -- > 2.32.0.93.g670b81a890-goog > >
Re: [PATCH 02/12] arm64: dts: rockchip: Add panel orientation to Odroid Go Advance
On Thu, 2021-06-24 at 20:37 +0200, Heiko Stübner wrote: > Am Donnerstag, 24. Juni 2021, 20:26:02 CEST schrieb Ezequiel Garcia: > > The Odroid Go Advance panel is rotated, so let's reflect this > > in the device tree. > > > > Signed-off-by: Ezequiel Garcia > > similar patch already applied for 5.14: > https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts?id=edb39de5d731f147c7b08c4a5eb246ae1dbdd947 Great minds think alike! :) Thanks, Ezequiel
[PATCH 05/12] media: hantro: Avoid redundant hantro_get_{dst, src}_buf() calls
Getting the next src/dst buffer is relatively expensive so avoid doing it multiple times. Signed-off-by: Ezequiel Garcia --- .../staging/media/hantro/hantro_g1_h264_dec.c | 17 - .../staging/media/hantro/hantro_g1_vp8_dec.c | 18 +- .../media/hantro/rockchip_vpu2_hw_vp8_dec.c | 19 +-- 3 files changed, 26 insertions(+), 28 deletions(-) diff --git a/drivers/staging/media/hantro/hantro_g1_h264_dec.c b/drivers/staging/media/hantro/hantro_g1_h264_dec.c index 5c792b7bcb79..2aa37baad0c3 100644 --- a/drivers/staging/media/hantro/hantro_g1_h264_dec.c +++ b/drivers/staging/media/hantro/hantro_g1_h264_dec.c @@ -19,13 +19,12 @@ #include "hantro_hw.h" #include "hantro_v4l2.h" -static void set_params(struct hantro_ctx *ctx) +static void set_params(struct hantro_ctx *ctx, struct vb2_v4l2_buffer *src_buf) { const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls; const struct v4l2_ctrl_h264_decode_params *dec_param = ctrls->decode; const struct v4l2_ctrl_h264_sps *sps = ctrls->sps; const struct v4l2_ctrl_h264_pps *pps = ctrls->pps; - struct vb2_v4l2_buffer *src_buf = hantro_get_src_buf(ctx); struct hantro_dev *vpu = ctx->dev; u32 reg; @@ -226,22 +225,20 @@ static void set_ref(struct hantro_ctx *ctx) } } -static void set_buffers(struct hantro_ctx *ctx) +static void set_buffers(struct hantro_ctx *ctx, struct vb2_v4l2_buffer *src_buf) { const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls; - struct vb2_v4l2_buffer *src_buf, *dst_buf; + struct vb2_v4l2_buffer *dst_buf; struct hantro_dev *vpu = ctx->dev; dma_addr_t src_dma, dst_dma; size_t offset = 0; - src_buf = hantro_get_src_buf(ctx); - dst_buf = hantro_get_dst_buf(ctx); - /* Source (stream) buffer. */ src_dma = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0); vdpu_write_relaxed(vpu, src_dma, G1_REG_ADDR_STR); /* Destination (decoded frame) buffer. */ + dst_buf = hantro_get_dst_buf(ctx); dst_dma = hantro_get_dec_buf_addr(ctx, &dst_buf->vb2_buf); /* Adjust dma addr to start at second line for bottom field */ if (ctrls->decode->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD) @@ -276,6 +273,7 @@ static void set_buffers(struct hantro_ctx *ctx) int hantro_g1_h264_dec_run(struct hantro_ctx *ctx) { struct hantro_dev *vpu = ctx->dev; + struct vb2_v4l2_buffer *src_buf; int ret; /* Prepare the H264 decoder context. */ @@ -284,9 +282,10 @@ int hantro_g1_h264_dec_run(struct hantro_ctx *ctx) return ret; /* Configure hardware registers. */ - set_params(ctx); + src_buf = hantro_get_src_buf(ctx); + set_params(ctx, src_buf); set_ref(ctx); - set_buffers(ctx); + set_buffers(ctx, src_buf); hantro_end_prepare_run(ctx); diff --git a/drivers/staging/media/hantro/hantro_g1_vp8_dec.c b/drivers/staging/media/hantro/hantro_g1_vp8_dec.c index 2afd5996d75f..6180b23e7d94 100644 --- a/drivers/staging/media/hantro/hantro_g1_vp8_dec.c +++ b/drivers/staging/media/hantro/hantro_g1_vp8_dec.c @@ -367,13 +367,12 @@ static void cfg_tap(struct hantro_ctx *ctx, } static void cfg_ref(struct hantro_ctx *ctx, - const struct v4l2_ctrl_vp8_frame *hdr) + const struct v4l2_ctrl_vp8_frame *hdr, + struct vb2_v4l2_buffer *vb2_dst) { struct hantro_dev *vpu = ctx->dev; - struct vb2_v4l2_buffer *vb2_dst; dma_addr_t ref; - vb2_dst = hantro_get_dst_buf(ctx); ref = hantro_get_ref(ctx, hdr->last_frame_ts); if (!ref) { @@ -405,16 +404,14 @@ static void cfg_ref(struct hantro_ctx *ctx, } static void cfg_buffers(struct hantro_ctx *ctx, - const struct v4l2_ctrl_vp8_frame *hdr) + const struct v4l2_ctrl_vp8_frame *hdr, + struct vb2_v4l2_buffer *vb2_dst) { const struct v4l2_vp8_segment *seg = &hdr->segment; struct hantro_dev *vpu = ctx->dev; - struct vb2_v4l2_buffer *vb2_dst; dma_addr_t dst_dma; u32 reg; - vb2_dst = hantro_get_dst_buf(ctx); - /* Set probability table buffer address */ vdpu_write_relaxed(vpu, ctx->vp8_dec.prob_tbl.dma, G1_REG_ADDR_QTABLE); @@ -436,6 +433,7 @@ int hantro_g1_vp8_dec_run(struct hantro_ctx *ctx) { const struct v4l2_ctrl_vp8_frame *hdr; struct hantro_dev *vpu = ctx->dev; + struct vb2_v4l2_buffer *vb2_dst; size_t height = ctx->dst_fmt.height; size_t width = ctx->dst_fmt.width; u32 mb_width, mb_height; @@ -499,8 +497,10 @@ int hantro_g1_vp8_dec_run(struct hantro_ctx *ctx) cfg_qp(ctx, hdr); cfg_parts(ctx, hdr); cfg_tap(ctx, hdr); - cfg_ref(ctx, hdr); - cfg_buffers(ctx, hdr); + + vb
[PATCH 12/12] media: hantro: Add support for the Rockchip PX30
From: Paul Kocialkowski The PX30 SoC includes both the VDPU2 and VEPU2 blocks which are similar to the RK3399 (Hantro G1/H1 with shuffled registers). Signed-off-by: Paul Kocialkowski Signed-off-by: Ezequiel Garcia --- drivers/staging/media/hantro/hantro_drv.c | 1 + drivers/staging/media/hantro/hantro_hw.h | 1 + .../staging/media/hantro/rockchip_vpu_hw.c| 28 +++ 3 files changed, 30 insertions(+) diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c index 9b5415176bfe..8a2edd67f2c6 100644 --- a/drivers/staging/media/hantro/hantro_drv.c +++ b/drivers/staging/media/hantro/hantro_drv.c @@ -582,6 +582,7 @@ static const struct v4l2_file_operations hantro_fops = { static const struct of_device_id of_hantro_match[] = { #ifdef CONFIG_VIDEO_HANTRO_ROCKCHIP + { .compatible = "rockchip,px30-vpu", .data = &px30_vpu_variant, }, { .compatible = "rockchip,rk3036-vpu", .data = &rk3036_vpu_variant, }, { .compatible = "rockchip,rk3066-vpu", .data = &rk3066_vpu_variant, }, { .compatible = "rockchip,rk3288-vpu", .data = &rk3288_vpu_variant, }, diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/media/hantro/hantro_hw.h index 9296624654a6..df7b5e3a57b9 100644 --- a/drivers/staging/media/hantro/hantro_hw.h +++ b/drivers/staging/media/hantro/hantro_hw.h @@ -209,6 +209,7 @@ enum hantro_enc_fmt { extern const struct hantro_variant imx8mq_vpu_g2_variant; extern const struct hantro_variant imx8mq_vpu_variant; +extern const struct hantro_variant px30_vpu_variant; extern const struct hantro_variant rk3036_vpu_variant; extern const struct hantro_variant rk3066_vpu_variant; extern const struct hantro_variant rk3288_vpu_variant; diff --git a/drivers/staging/media/hantro/rockchip_vpu_hw.c b/drivers/staging/media/hantro/rockchip_vpu_hw.c index e4e3b5e7689b..e7f56e30b4a8 100644 --- a/drivers/staging/media/hantro/rockchip_vpu_hw.c +++ b/drivers/staging/media/hantro/rockchip_vpu_hw.c @@ -16,6 +16,7 @@ #define RK3066_ACLK_MAX_FREQ (300 * 1000 * 1000) #define RK3288_ACLK_MAX_FREQ (400 * 1000 * 1000) +#define PX30_ACLK_MAX_FREQ (300 * 1000 * 1000) /* * Supported formats. @@ -279,6 +280,12 @@ static int rockchip_vpu_hw_init(struct hantro_dev *vpu) return 0; } +static int px30_vpu_hw_init(struct hantro_dev *vpu) +{ + clk_set_rate(vpu->clocks[0].clk, PX30_ACLK_MAX_FREQ); + return 0; +} + static void rk3066_vpu_dec_reset(struct hantro_ctx *ctx) { struct hantro_dev *vpu = ctx->dev; @@ -452,6 +459,10 @@ static const char * const rockchip_vpu_clk_names[] = { "aclk", "hclk" }; +static const char * const px30_clk_names[] = { + "aclk", "hclk" +}; + /* VDPU1/VEPU1 */ const struct hantro_variant rk3036_vpu_variant = { @@ -548,3 +559,20 @@ const struct hantro_variant rk3399_vpu_variant = { .clk_names = rockchip_vpu_clk_names, .num_clocks = ARRAY_SIZE(rockchip_vpu_clk_names) }; + +const struct hantro_variant px30_vpu_variant = { + .enc_offset = 0x0, + .enc_fmts = rockchip_vpu_enc_fmts, + .num_enc_fmts = ARRAY_SIZE(rockchip_vpu_enc_fmts), + .dec_offset = 0x400, + .dec_fmts = rk3399_vpu_dec_fmts, + .num_dec_fmts = ARRAY_SIZE(rk3399_vpu_dec_fmts), + .codec = HANTRO_JPEG_ENCODER | HANTRO_MPEG2_DECODER | +HANTRO_VP8_DECODER | HANTRO_H264_DECODER, + .codec_ops = rk3399_vpu_codec_ops, + .irqs = rockchip_vpu2_irqs, + .num_irqs = ARRAY_SIZE(rockchip_vpu2_irqs), + .init = px30_vpu_hw_init, + .clk_names = px30_clk_names, + .num_clocks = ARRAY_SIZE(px30_clk_names) +}; -- 2.30.0
Re: [PATCH 2/6] KVM: mmu: also return page from gfn_to_pfn
Excerpts from David Stevens's message of June 24, 2021 1:57 pm: > From: David Stevens > > Return a struct kvm_pfn_page containing both a pfn and an optional > struct page from the gfn_to_pfn family of functions. This differentiates > the gup and follow_fault_pfn cases, which allows callers that only need > a pfn to avoid touching the page struct in the latter case. For callers > that need a struct page, introduce a helper function that unwraps a > struct kvm_pfn_page into a struct page. This helper makes the call to > kvm_get_pfn which had previously been in hva_to_pfn_remapped. > > For now, wrap all calls to gfn_to_pfn functions in the new helper > function. Callers which don't need the page struct will be updated in > follow-up patches. Hmm. You mean callers that do need the page will be updated? Normally if there will be leftover users that don't need the struct page then you would go the other way and keep the old call the same, and add a new one (gfn_to_pfn_page) just for those that need it. Most kernel code I look at passes back multiple values by updating pointers to struct or variables rather than returning a struct, I suppose that's not really a big deal and a matter of taste. Thanks, Nick
[PATCH 06/12] media: hantro: h264: Move DPB valid and long-term bitmaps
In order to reuse these bitmaps, move this process to struct hantro_h264_dec_hw_ctx. This will be used by the Rockchip VDPU2 H.264 driver. Signed-off-by: Ezequiel Garcia --- .../staging/media/hantro/hantro_g1_h264_dec.c | 17 ++--- drivers/staging/media/hantro/hantro_h264.c | 13 + drivers/staging/media/hantro/hantro_hw.h| 4 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/staging/media/hantro/hantro_g1_h264_dec.c b/drivers/staging/media/hantro/hantro_g1_h264_dec.c index 2aa37baad0c3..6faacfc44c7c 100644 --- a/drivers/staging/media/hantro/hantro_g1_h264_dec.c +++ b/drivers/staging/media/hantro/hantro_g1_h264_dec.c @@ -129,25 +129,12 @@ static void set_ref(struct hantro_ctx *ctx) struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb; const u8 *b0_reflist, *b1_reflist, *p_reflist; struct hantro_dev *vpu = ctx->dev; - u32 dpb_longterm = 0; - u32 dpb_valid = 0; int reg_num; u32 reg; int i; - /* -* Set up bit maps of valid and long term DPBs. -* NOTE: The bits are reversed, i.e. MSb is DPB 0. -*/ - for (i = 0; i < HANTRO_H264_DPB_SIZE; ++i) { - if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE) - dpb_valid |= BIT(HANTRO_H264_DPB_SIZE - 1 - i); - - if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) - dpb_longterm |= BIT(HANTRO_H264_DPB_SIZE - 1 - i); - } - vdpu_write_relaxed(vpu, dpb_valid << 16, G1_REG_VALID_REF); - vdpu_write_relaxed(vpu, dpb_longterm << 16, G1_REG_LT_REF); + vdpu_write_relaxed(vpu, ctx->h264_dec.dpb_valid, G1_REG_VALID_REF); + vdpu_write_relaxed(vpu, ctx->h264_dec.dpb_longterm, G1_REG_LT_REF); /* * Set up reference frame picture numbers. diff --git a/drivers/staging/media/hantro/hantro_h264.c b/drivers/staging/media/hantro/hantro_h264.c index ed6eaf11d96f..6d72136760e7 100644 --- a/drivers/staging/media/hantro/hantro_h264.c +++ b/drivers/staging/media/hantro/hantro_h264.c @@ -229,12 +229,25 @@ static void prepare_table(struct hantro_ctx *ctx) const struct v4l2_ctrl_h264_decode_params *dec_param = ctrls->decode; struct hantro_h264_dec_priv_tbl *tbl = ctx->h264_dec.priv.cpu; const struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb; + u32 dpb_longterm = 0; + u32 dpb_valid = 0; int i; for (i = 0; i < HANTRO_H264_DPB_SIZE; ++i) { tbl->poc[i * 2] = dpb[i].top_field_order_cnt; tbl->poc[i * 2 + 1] = dpb[i].bottom_field_order_cnt; + + /* +* Set up bit maps of valid and long term DPBs. +* NOTE: The bits are reversed, i.e. MSb is DPB 0. +*/ + if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE) + dpb_valid |= BIT(HANTRO_H264_DPB_SIZE - 1 - i); + if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) + dpb_longterm |= BIT(HANTRO_H264_DPB_SIZE - 1 - i); } + ctx->h264_dec.dpb_valid = dpb_valid << 16; + ctx->h264_dec.dpb_longterm = dpb_longterm << 16; tbl->poc[32] = dec_param->top_field_order_cnt; tbl->poc[33] = dec_param->bottom_field_order_cnt; diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/media/hantro/hantro_hw.h index 5dcf65805396..ce678fedaad6 100644 --- a/drivers/staging/media/hantro/hantro_hw.h +++ b/drivers/staging/media/hantro/hantro_hw.h @@ -89,12 +89,16 @@ struct hantro_h264_dec_reflists { * @dpb: DPB * @reflists: P/B0/B1 reflists * @ctrls: V4L2 controls attached to a run + * @dpb_longterm: DPB long-term + * @dpb_valid: DPB valid */ struct hantro_h264_dec_hw_ctx { struct hantro_aux_buf priv; struct v4l2_h264_dpb_entry dpb[HANTRO_H264_DPB_SIZE]; struct hantro_h264_dec_reflists reflists; struct hantro_h264_dec_ctrls ctrls; + u32 dpb_longterm; + u32 dpb_valid; }; /** -- 2.30.0
Re: [PATCH 0/6] KVM: Remove uses of struct page from x86 and arm64 MMU
Excerpts from David Stevens's message of June 24, 2021 1:57 pm: > KVM supports mapping VM_IO and VM_PFNMAP memory into the guest by using > follow_pte in gfn_to_pfn. However, the resolved pfns may not have > assoicated struct pages, so they should not be passed to pfn_to_page. > This series removes such calls from the x86 and arm64 secondary MMU. To > do this, this series modifies gfn_to_pfn to return a struct page in > addition to a pfn, if the hva was resolved by gup. This allows the > caller to call put_page only when necessated by gup. > > This series provides a helper function that unwraps the new return type > of gfn_to_pfn to provide behavior identical to the old behavior. As I > have no hardware to test powerpc/mips changes, the function is used > there for minimally invasive changes. Additionally, as gfn_to_page and > gfn_to_pfn_cache are not integrated with mmu notifier, they cannot be > easily changed over to only use pfns. > > This addresses CVE-2021-22543 on x86 and arm64. Does this fix the problem? (untested I don't have a POC setup at hand, but at least in concept) I have no problem with improving the API and probably in the direction of your series is good. But there seems to be a lot of unfixed arch code and broken APIs remaining left to do after your series too. This might be most suitable to backport and as a base for your series that can take more time to convert to new APIs. Thanks, Nick --- diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6a6bc7af0e28..e208c279d903 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2104,13 +2104,21 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, * Whoever called remap_pfn_range is also going to call e.g. * unmap_mapping_range before the underlying pages are freed, * causing a call to our MMU notifier. +* +* Certain IO or PFNMAP mappings can be backed with valid +* struct pages, but be allocated without refcounting e.g., +* tail pages of non-compound higher order allocations, which +* would then underflow the refcount when the caller does the +* required put_page. Don't allow those pages here. */ - kvm_get_pfn(pfn); + if (!kvm_try_get_pfn(pfn)) + r = -EFAULT; out: pte_unmap_unlock(ptep, ptl); *p_pfn = pfn; - return 0; + + return r; } /* @@ -2487,6 +2495,13 @@ void kvm_set_pfn_accessed(kvm_pfn_t pfn) } EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); +static int kvm_try_get_pfn(kvm_pfn_t pfn) +{ + if (kvm_is_reserved_pfn(pfn)) + return 1; + return get_page_unless_zero(pfn_to_page(pfn)); +} + void kvm_get_pfn(kvm_pfn_t pfn) { if (!kvm_is_reserved_pfn(pfn))
Re: [PATCH 0/6] KVM: Remove uses of struct page from x86 and arm64 MMU
Excerpts from Nicholas Piggin's message of June 24, 2021 8:34 pm: > Excerpts from David Stevens's message of June 24, 2021 1:57 pm: >> KVM supports mapping VM_IO and VM_PFNMAP memory into the guest by using >> follow_pte in gfn_to_pfn. However, the resolved pfns may not have >> assoicated struct pages, so they should not be passed to pfn_to_page. >> This series removes such calls from the x86 and arm64 secondary MMU. To >> do this, this series modifies gfn_to_pfn to return a struct page in >> addition to a pfn, if the hva was resolved by gup. This allows the >> caller to call put_page only when necessated by gup. >> >> This series provides a helper function that unwraps the new return type >> of gfn_to_pfn to provide behavior identical to the old behavior. As I >> have no hardware to test powerpc/mips changes, the function is used >> there for minimally invasive changes. Additionally, as gfn_to_page and >> gfn_to_pfn_cache are not integrated with mmu notifier, they cannot be >> easily changed over to only use pfns. >> >> This addresses CVE-2021-22543 on x86 and arm64. > > Does this fix the problem? (untested I don't have a POC setup at hand, > but at least in concept) This one actually compiles at least. Unfortunately I don't have much time in the near future to test, and I only just found out about this CVE a few hours ago. --- It's possible to create a region which maps valid but non-refcounted pages (e.g., tail pages of non-compound higher order allocations). These host pages can then be returned by gfn_to_page, gfn_to_pfn, etc., family of APIs, which take a reference to the page, which takes it from 0 to 1. When the reference is dropped, this will free the page incorrectly. Fix this by only taking a reference on the page if it was non-zero, which indicates it is participating in normal refcounting (and can be released with put_page). --- virt/kvm/kvm_main.c | 19 +-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6a6bc7af0e28..46fb042837d2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2055,6 +2055,13 @@ static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault) return true; } +static int kvm_try_get_pfn(kvm_pfn_t pfn) +{ + if (kvm_is_reserved_pfn(pfn)) + return 1; + return get_page_unless_zero(pfn_to_page(pfn)); +} + static int hva_to_pfn_remapped(struct vm_area_struct *vma, unsigned long addr, bool *async, bool write_fault, bool *writable, @@ -2104,13 +2111,21 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, * Whoever called remap_pfn_range is also going to call e.g. * unmap_mapping_range before the underlying pages are freed, * causing a call to our MMU notifier. +* +* Certain IO or PFNMAP mappings can be backed with valid +* struct pages, but be allocated without refcounting e.g., +* tail pages of non-compound higher order allocations, which +* would then underflow the refcount when the caller does the +* required put_page. Don't allow those pages here. */ - kvm_get_pfn(pfn); + if (!kvm_try_get_pfn(pfn)) + r = -EFAULT; out: pte_unmap_unlock(ptep, ptl); *p_pfn = pfn; - return 0; + + return r; } /* -- 2.23.0
[PATCH 03/12] hantro: vp8: Move noisy WARN_ON to vpu_debug
When the VP8 decoders can't find a reference frame, the driver falls back to the current output frame. This will probably produce some undesirable results, leading to frame corruption, but shouldn't cause noisy warnings. Signed-off-by: Ezequiel Garcia Acked-by: Nicolas Dufresne --- drivers/staging/media/hantro/hantro_g1_vp8_dec.c| 13 ++--- .../staging/media/hantro/rockchip_vpu2_hw_vp8_dec.c | 13 ++--- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/staging/media/hantro/hantro_g1_vp8_dec.c b/drivers/staging/media/hantro/hantro_g1_vp8_dec.c index 96622a7f8279..2afd5996d75f 100644 --- a/drivers/staging/media/hantro/hantro_g1_vp8_dec.c +++ b/drivers/staging/media/hantro/hantro_g1_vp8_dec.c @@ -376,12 +376,17 @@ static void cfg_ref(struct hantro_ctx *ctx, vb2_dst = hantro_get_dst_buf(ctx); ref = hantro_get_ref(ctx, hdr->last_frame_ts); - if (!ref) + if (!ref) { + vpu_debug(0, "failed to find last frame ts=%llu\n", + hdr->last_frame_ts); ref = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0); + } vdpu_write_relaxed(vpu, ref, G1_REG_ADDR_REF(0)); ref = hantro_get_ref(ctx, hdr->golden_frame_ts); - WARN_ON(!ref && hdr->golden_frame_ts); + if (!ref && hdr->golden_frame_ts) + vpu_debug(0, "failed to find golden frame ts=%llu\n", + hdr->golden_frame_ts); if (!ref) ref = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0); if (hdr->flags & V4L2_VP8_FRAME_FLAG_SIGN_BIAS_GOLDEN) @@ -389,7 +394,9 @@ static void cfg_ref(struct hantro_ctx *ctx, vdpu_write_relaxed(vpu, ref, G1_REG_ADDR_REF(4)); ref = hantro_get_ref(ctx, hdr->alt_frame_ts); - WARN_ON(!ref && hdr->alt_frame_ts); + if (!ref && hdr->alt_frame_ts) + vpu_debug(0, "failed to find alt frame ts=%llu\n", + hdr->alt_frame_ts); if (!ref) ref = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0); if (hdr->flags & V4L2_VP8_FRAME_FLAG_SIGN_BIAS_ALT) diff --git a/drivers/staging/media/hantro/rockchip_vpu2_hw_vp8_dec.c b/drivers/staging/media/hantro/rockchip_vpu2_hw_vp8_dec.c index 951b55f58a61..704607511b57 100644 --- a/drivers/staging/media/hantro/rockchip_vpu2_hw_vp8_dec.c +++ b/drivers/staging/media/hantro/rockchip_vpu2_hw_vp8_dec.c @@ -453,12 +453,17 @@ static void cfg_ref(struct hantro_ctx *ctx, vb2_dst = hantro_get_dst_buf(ctx); ref = hantro_get_ref(ctx, hdr->last_frame_ts); - if (!ref) + if (!ref) { + vpu_debug(0, "failed to find last frame ts=%llu\n", + hdr->last_frame_ts); ref = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0); + } vdpu_write_relaxed(vpu, ref, VDPU_REG_VP8_ADDR_REF0); ref = hantro_get_ref(ctx, hdr->golden_frame_ts); - WARN_ON(!ref && hdr->golden_frame_ts); + if (!ref && hdr->golden_frame_ts) + vpu_debug(0, "failed to find golden frame ts=%llu\n", + hdr->golden_frame_ts); if (!ref) ref = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0); if (hdr->flags & V4L2_VP8_FRAME_FLAG_SIGN_BIAS_GOLDEN) @@ -466,7 +471,9 @@ static void cfg_ref(struct hantro_ctx *ctx, vdpu_write_relaxed(vpu, ref, VDPU_REG_VP8_ADDR_REF2_5(2)); ref = hantro_get_ref(ctx, hdr->alt_frame_ts); - WARN_ON(!ref && hdr->alt_frame_ts); + if (!ref && hdr->alt_frame_ts) + vpu_debug(0, "failed to find alt frame ts=%llu\n", + hdr->alt_frame_ts); if (!ref) ref = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0); if (hdr->flags & V4L2_VP8_FRAME_FLAG_SIGN_BIAS_ALT) -- 2.30.0
[PATCH 01/12] drm/panel: kd35t133: Add panel orientation support
Parse the device tree rotation specifier, and set a DRM connector orientation property. The property can then be read by compositors to apply hardware plane rotation or a GPU transform. Signed-off-by: Ezequiel Garcia --- drivers/gpu/drm/panel/panel-elida-kd35t133.c | 8 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/panel/panel-elida-kd35t133.c b/drivers/gpu/drm/panel/panel-elida-kd35t133.c index fe5ac3ef9018..5987d28c874c 100644 --- a/drivers/gpu/drm/panel/panel-elida-kd35t133.c +++ b/drivers/gpu/drm/panel/panel-elida-kd35t133.c @@ -39,6 +39,7 @@ struct kd35t133 { struct device *dev; struct drm_panel panel; + enum drm_panel_orientation orientation; struct gpio_desc *reset_gpio; struct regulator *vdd; struct regulator *iovcc; @@ -216,6 +217,7 @@ static int kd35t133_get_modes(struct drm_panel *panel, connector->display_info.width_mm = mode->width_mm; connector->display_info.height_mm = mode->height_mm; drm_mode_probed_add(connector, mode); + drm_connector_set_panel_orientation(connector, ctx->orientation); return 1; } @@ -258,6 +260,12 @@ static int kd35t133_probe(struct mipi_dsi_device *dsi) return ret; } + ret = of_drm_get_panel_orientation(dev->of_node, &ctx->orientation); + if (ret) { + dev_err(dev, "%pOF: failed to get orientation %d\n", dev->of_node, ret); + return ret; + } + mipi_dsi_set_drvdata(dsi, ctx); ctx->dev = dev; -- 2.30.0
[PATCH 00/12] hantro: Enable H.264 VDPU2 (Odroid Advance Go)
This series adds support for H.264 decoding on the PX30, RK3328 and RK3326 platforms, enabling the VDPU2 core. Given it's tested on the Odroid Advance Go, patches 1 and 2 add the basic support to report the panel orientation to userspace (Heiko, if you like them, feel free to pick them). Weston (for instance) picks up the orientation automagically and rotates the render. Patches 3 and 4 are just low-hanging fruit that was on my backlog. Patches 5, 6 and 7 add some helpers to avoid duplicating some processes between Hantro G1 and VDPU2. Patches 8 and 9 enable the VDPU2 H.264. The implementation is based on a patch from Jonas Karlman [1], which I forwarded ported to mainline. Finally, patches 10 to 12 add support for the VPU on Rockchip PX30 SoCs. These patches are based on patches submitted by Paul Kocialkowski [2], which I ported and adjusted a bit. Tested on i.MX8MQ EVK and RK3326 Odroid Advance Go, the latter is able to decode a 1080p sample at ~100fps nicely. Fluster conformance testing is looking good as well, and producing expected results: RK3326: Ran 135 tests in 480.067s FAILED (failures=9, errors=54) i.MX8MQ: Ran 135 tests in 337.491s FAILED (failures=9, errors=54) [1] https://lore.kernel.org/linux-media/he1pr06mb40119de07d38060f531d1070ac...@he1pr06mb4011.eurprd06.prod.outlook.com/ [2] https://lore.kernel.org/patchwork/cover/1361795/ Ezequiel Garcia (8): drm/panel: kd35t133: Add panel orientation support arm64: dts: rockchip: Add panel orientation to Odroid Go Advance hantro: vp8: Move noisy WARN_ON to vpu_debug hantro: Make struct hantro_variant.init() optional media: hantro: Avoid redundant hantro_get_{dst,src}_buf() calls media: hantro: h264: Move DPB valid and long-term bitmaps media: hantro: h264: Move reference picture number to a helper media: hantro: Enable H.264 on Rockchip VDPU2 Jonas Karlman (1): media: hantro: Add H.264 support for Rockchip VDPU2 Paul Kocialkowski (3): dt-bindings: media: rockchip-vpu: Add PX30 compatible arm64: dts: rockchip: Add VPU support for the PX30 media: hantro: Add support for the Rockchip PX30 .../bindings/media/rockchip-vpu.yaml | 3 + arch/arm64/boot/dts/rockchip/px30.dtsi| 23 + .../boot/dts/rockchip/rk3326-odroid-go2.dts | 1 + drivers/gpu/drm/panel/panel-elida-kd35t133.c | 8 + drivers/staging/media/hantro/Makefile | 1 + drivers/staging/media/hantro/hantro.h | 4 +- drivers/staging/media/hantro/hantro_drv.c | 11 +- .../staging/media/hantro/hantro_g1_h264_dec.c | 48 +- .../staging/media/hantro/hantro_g1_vp8_dec.c | 31 +- drivers/staging/media/hantro/hantro_h264.c| 24 + drivers/staging/media/hantro/hantro_hw.h | 8 + .../media/hantro/rockchip_vpu2_hw_h264_dec.c | 491 ++ .../media/hantro/rockchip_vpu2_hw_vp8_dec.c | 32 +- .../staging/media/hantro/rockchip_vpu_hw.c| 54 +- .../staging/media/hantro/sama5d4_vdec_hw.c| 6 - 15 files changed, 671 insertions(+), 74 deletions(-) create mode 100644 drivers/staging/media/hantro/rockchip_vpu2_hw_h264_dec.c -- 2.30.0
Re: [PATCH 3/6] KVM: x86/mmu: avoid struct page in MMU
Excerpts from Marc Zyngier's message of June 24, 2021 8:06 pm: > On Thu, 24 Jun 2021 09:58:00 +0100, > Nicholas Piggin wrote: >> >> Excerpts from David Stevens's message of June 24, 2021 1:57 pm: >> > From: David Stevens >> > out_unlock: >> >if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa)) >> >read_unlock(&vcpu->kvm->mmu_lock); >> >else >> >write_unlock(&vcpu->kvm->mmu_lock); >> > - kvm_release_pfn_clean(pfn); >> > + if (pfnpg.page) >> > + put_page(pfnpg.page); >> >return r; >> > } >> >> How about >> >> kvm_release_pfn_page_clean(pfnpg); > > I'm not sure. I always found kvm_release_pfn_clean() ugly, because it > doesn't mark the page 'clean'. I find put_page() more correct. > > Something like 'kvm_put_pfn_page()' would make more sense, but I'm so > bad at naming things that I could just as well call it 'bob()'. That seems like a fine name to me. A little better than bob. Thanks, Nick
Re: [PATCH 02/12] arm64: dts: rockchip: Add panel orientation to Odroid Go Advance
Am Donnerstag, 24. Juni 2021, 20:26:02 CEST schrieb Ezequiel Garcia: > The Odroid Go Advance panel is rotated, so let's reflect this > in the device tree. > > Signed-off-by: Ezequiel Garcia similar patch already applied for 5.14: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts?id=edb39de5d731f147c7b08c4a5eb246ae1dbdd947 > --- > arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts > b/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts > index 49c97f76df77..cca19660e60a 100644 > --- a/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts > +++ b/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts > @@ -240,6 +240,7 @@ panel@0 { > iovcc-supply = <&vcc_lcd>; > reset-gpios = <&gpio3 RK_PC0 GPIO_ACTIVE_LOW>; > vdd-supply = <&vcc_lcd>; > + rotation = <270>; > > port { > mipi_in_panel: endpoint { >
Re: [PATCH 01/12] drm/panel: kd35t133: Add panel orientation support
Am Donnerstag, 24. Juni 2021, 20:26:01 CEST schrieb Ezequiel Garcia: > Parse the device tree rotation specifier, and set a DRM > connector orientation property. The property can then be read > by compositors to apply hardware plane rotation or a GPU transform. > > Signed-off-by: Ezequiel Garcia similar patch already applied for 5.14: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/drivers/gpu/drm/panel/panel-elida-kd35t133.c?id=610d9c311b1387f8c4ac602fee1f2a1cb0508707 > --- > drivers/gpu/drm/panel/panel-elida-kd35t133.c | 8 > 1 file changed, 8 insertions(+) > > diff --git a/drivers/gpu/drm/panel/panel-elida-kd35t133.c > b/drivers/gpu/drm/panel/panel-elida-kd35t133.c > index fe5ac3ef9018..5987d28c874c 100644 > --- a/drivers/gpu/drm/panel/panel-elida-kd35t133.c > +++ b/drivers/gpu/drm/panel/panel-elida-kd35t133.c > @@ -39,6 +39,7 @@ > struct kd35t133 { > struct device *dev; > struct drm_panel panel; > + enum drm_panel_orientation orientation; > struct gpio_desc *reset_gpio; > struct regulator *vdd; > struct regulator *iovcc; > @@ -216,6 +217,7 @@ static int kd35t133_get_modes(struct drm_panel *panel, > connector->display_info.width_mm = mode->width_mm; > connector->display_info.height_mm = mode->height_mm; > drm_mode_probed_add(connector, mode); > + drm_connector_set_panel_orientation(connector, ctx->orientation); > > return 1; > } > @@ -258,6 +260,12 @@ static int kd35t133_probe(struct mipi_dsi_device *dsi) > return ret; > } > > + ret = of_drm_get_panel_orientation(dev->of_node, &ctx->orientation); > + if (ret) { > + dev_err(dev, "%pOF: failed to get orientation %d\n", > dev->of_node, ret); > + return ret; > + } > + > mipi_dsi_set_drvdata(dsi, ctx); > > ctx->dev = dev; >
[PATCH 4/4] drm/i915/gem: Migrate to system at dma-buf map time
Until we support p2p dma or as a complement to that, migrate data to system memory at dma-buf map time if possible. Signed-off-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 616c3a2f1baf..a52f885bc09a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -25,7 +25,14 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme struct scatterlist *src, *dst; int ret, i; - ret = i915_gem_object_pin_pages_unlocked(obj); + ret = i915_gem_object_lock_interruptible(obj, NULL); + if (ret) + return ERR_PTR(ret); + + ret = i915_gem_object_migrate(obj, NULL, INTEL_REGION_SMEM); + if (!ret) + ret = i915_gem_object_pin_pages(obj); + i915_gem_object_unlock(obj); if (ret) goto err; -- 2.31.1
[PATCH 3/4] drm/i915/display: Migrate objects to LMEM if possible for display
Objects intended to be used as display framebuffers must reside in LMEM for discrete. If they happen to not do that, migrate them to LMEM before pinning. Signed-off-by: Thomas Hellström --- drivers/gpu/drm/i915/display/intel_display.c | 5 - drivers/gpu/drm/i915/gem/i915_gem_domain.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 21 drivers/gpu/drm/i915/gem/i915_gem_object.h | 2 -- 4 files changed, 5 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 4524dbfa5e42..83a4aba54d67 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -1331,6 +1331,9 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, ret = i915_gem_object_lock(obj, &ww); if (!ret && phys_cursor) ret = i915_gem_object_attach_phys(obj, alignment); + else if (!ret && HAS_LMEM(dev_priv)) + ret = i915_gem_object_migrate(obj, &ww, INTEL_REGION_LMEM); + /* TODO: Do we need to sync when migration becomes async? */ if (!ret) ret = i915_gem_object_pin_pages(obj); if (ret) @@ -11770,7 +11773,7 @@ intel_user_framebuffer_create(struct drm_device *dev, /* object is backed with LMEM for discrete */ i915 = to_i915(obj->base.dev); - if (HAS_LMEM(i915) && !i915_gem_object_validates_to_lmem(obj)) { + if (HAS_LMEM(i915) && !i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM)) { /* object is "remote", not in local memory */ i915_gem_object_put(obj); return ERR_PTR(-EREMOTE); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 073822100da7..7d1400b13429 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -375,7 +375,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, struct i915_vma *vma; int ret; - /* Frame buffer must be in LMEM (no migration yet) */ + /* Frame buffer must be in LMEM */ if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) return ERR_PTR(-EINVAL); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index 41d5182cd367..be1d122574af 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -23,27 +23,6 @@ i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj, return io_mapping_map_wc(&obj->mm.region->iomap, offset, size); } -/** - * i915_gem_object_validates_to_lmem - Whether the object is resident in - * lmem when pages are present. - * @obj: The object to check. - * - * Migratable objects residency may change from under us if the object is - * not pinned or locked. This function is intended to be used to check whether - * the object can only reside in lmem when pages are present. - * - * Return: Whether the object is always resident in lmem when pages are - * present. - */ -bool i915_gem_object_validates_to_lmem(struct drm_i915_gem_object *obj) -{ - struct intel_memory_region *mr = READ_ONCE(obj->mm.region); - - return !i915_gem_object_migratable(obj) && - mr && (mr->type == INTEL_MEMORY_LOCAL || - mr->type == INTEL_MEMORY_STOLEN_LOCAL); -} - /** * i915_gem_object_is_lmem - Whether the object is resident in * lmem diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 8cbd7a5334e2..d423d8cac4f2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -597,8 +597,6 @@ bool i915_gem_object_evictable(struct drm_i915_gem_object *obj); bool i915_gem_object_migratable(struct drm_i915_gem_object *obj); -bool i915_gem_object_validates_to_lmem(struct drm_i915_gem_object *obj); - int i915_gem_object_migrate(struct drm_i915_gem_object *obj, struct i915_gem_ww_ctx *ww, enum intel_region_id id); -- 2.31.1
[PATCH 2/4] drm/i915/gem: Introduce a selftest for the gem object migrate functionality
From: Matthew Auld A selftest for the gem object migrate functionality. Slightly adapted from the original by Matthew to the new interface and new fill blit code. Co-developed-by: Thomas Hellström Signed-off-by: Thomas Hellström Signed-off-by: Matthew Auld --- drivers/gpu/drm/i915/gem/i915_gem_object.c| 1 + .../drm/i915/gem/selftests/i915_gem_migrate.c | 237 ++ .../drm/i915/selftests/i915_live_selftests.h | 1 + 3 files changed, 239 insertions(+) create mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 6421c3a8b2f3..24f4395bf387 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -644,6 +644,7 @@ static const struct drm_gem_object_funcs i915_gem_object_funcs = { #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/huge_gem_object.c" #include "selftests/huge_pages.c" +#include "selftests/i915_gem_migrate.c" #include "selftests/i915_gem_object.c" #include "selftests/i915_gem_coherency.c" #endif diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c new file mode 100644 index ..a437b66f64d9 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -0,0 +1,237 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020-2021 Intel Corporation + */ + +#include "gt/intel_migrate.h" + +static int igt_smem_create_migrate(void *arg) +{ + struct intel_gt *gt = arg; + struct drm_i915_private *i915 = gt->i915; + struct drm_i915_gem_object *obj; + struct i915_gem_ww_ctx ww; + int err = 0; + + /* Switch object backing-store on create */ + obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, 0); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + for_i915_gem_ww(&ww, err, true) { + err = i915_gem_object_lock(obj, &ww); + if (err) + continue; + + if (!i915_gem_object_can_migrate(obj, INTEL_REGION_SMEM)) { + err = -EINVAL; + continue; + } + + err = i915_gem_object_migrate(obj, &ww, INTEL_REGION_SMEM); + if (err) + continue; + + err = i915_gem_object_pin_pages(obj); + if (err) + continue; + + if (i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM)) + err = -EINVAL; + + i915_gem_object_unpin_pages(obj); + } + i915_gem_object_put(obj); + + return err; +} + +static int igt_lmem_create_migrate(void *arg) +{ + struct intel_gt *gt = arg; + struct drm_i915_private *i915 = gt->i915; + struct drm_i915_gem_object *obj; + struct i915_gem_ww_ctx ww; + int err = 0; + + /* Switch object backing-store on create */ + obj = i915_gem_object_create_shmem(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + for_i915_gem_ww(&ww, err, true) { + err = i915_gem_object_lock(obj, &ww); + if (err) + continue; + + if (!i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM)) { + err = -EINVAL; + continue; + } + + err = i915_gem_object_migrate(obj, &ww, INTEL_REGION_LMEM); + if (err) + continue; + + err = i915_gem_object_pin_pages(obj); + if (err) + continue; + + if (i915_gem_object_can_migrate(obj, INTEL_REGION_SMEM)) + err = -EINVAL; + + i915_gem_object_unpin_pages(obj); + } + i915_gem_object_put(obj); + + return err; +} + +static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object *obj) +{ + int err; + + err = i915_gem_object_lock(obj, ww); + if (err) + return err; + + err = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_PRIORITY | + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT); + if (err) + return err; + + if (i915_gem_object_is_lmem(obj)) { + if (!i915_gem_object_can_migrate(obj, INTEL_REGION_SMEM)) { + pr_err("object can't migrate to smem.\n"); + return -EINVAL; + } + + err = i915_gem_object_migrate(obj, ww, INTEL_REGION_SMEM); + if (err) { + pr_err("Object failed migration to smem\n"); + if (err) +
[PATCH 1/4] drm/i915/gem: Implement object migration
Introduce an interface to migrate objects between regions. This is primarily intended to migrate objects to LMEM for display and to SYSTEM for dma-buf, but might be reused in one form or another for performande-based migration. Signed-off-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_object.c| 91 +++ drivers/gpu/drm/i915/gem/i915_gem_object.h| 12 +++ .../gpu/drm/i915/gem/i915_gem_object_types.h | 9 ++ drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 69 ++ drivers/gpu/drm/i915/gem/i915_gem_wait.c | 19 5 files changed, 183 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 07e8ff9a8aae..6421c3a8b2f3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -513,6 +513,97 @@ bool i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj) return obj->mem_flags & I915_BO_FLAG_IOMEM; } +/** + * i915_gem_object_can_migrate - Whether an object likely can be migrated + * + * @obj: The object to migrate + * @id: The region intended to migrate to + * + * Check whether the object backend supports migration to the + * given region. Note that pinning may affect the ability to migrate. + * + * Return: true if migration is possible, false otherwise. + */ +bool i915_gem_object_can_migrate(struct drm_i915_gem_object *obj, +enum intel_region_id id) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + unsigned int num_allowed = obj->mm.n_placements; + struct intel_memory_region *mr; + unsigned int i; + + GEM_BUG_ON(id >= INTEL_REGION_UNKNOWN); + GEM_BUG_ON(obj->mm.madv != I915_MADV_WILLNEED); + + if (!obj->ops->migrate) + return -EOPNOTSUPP; + + mr = i915->mm.regions[id]; + if (obj->mm.region == mr) + return true; + + if (!i915_gem_object_evictable(obj)) + return false; + + if (!(obj->flags & I915_BO_ALLOC_USER)) + return true; + + if (num_allowed == 0) + return false; + + for (i = 0; i < num_allowed; ++i) { + if (mr == obj->mm.placements[i]) + return true; + } + + return false; +} + +/** + * i915_gem_object_migrate - Migrate an object to the desired region id + * @obj: The object to migrate. + * @ww: An optional struct i915_gem_ww_ctx. If NULL, the backend may + * not be successful in evicting other objects to make room for this object. + * @id: The region id to migrate to. + * + * Attempt to migrate the object to the desired memory region. The + * object backend must support migration and the object may not be + * pinned, (explicitly pinned pages or pinned vmas). The object must + * be locked. + * On successful completion, the object will have pages pointing to + * memory in the new region, but an async migration task may not have + * completed yet, and to accomplish that, i915_gem_object_wait_migration() + * must be called. + * + * Return: 0 on success. Negative error code on failure. In particular may + * return -ENXIO on lack of region space, -EDEADLK for deadlock avoidance + * if @ww is set, -EINTR or -ERESTARTSYS if signal pending, and + * -EBUSY if the object is pinned. + */ +int i915_gem_object_migrate(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww, + enum intel_region_id id) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct intel_memory_region *mr; + + GEM_BUG_ON(id >= INTEL_REGION_UNKNOWN); + GEM_BUG_ON(obj->mm.madv != I915_MADV_WILLNEED); + assert_object_held(obj); + + mr = i915->mm.regions[id]; + if (obj->mm.region == mr) + return 0; + + if (!i915_gem_object_evictable(obj)) + return -EBUSY; + + if (!obj->ops->migrate) + return -EOPNOTSUPP; + + return obj->ops->migrate(obj, mr); +} + void i915_gem_init__objects(struct drm_i915_private *i915) { INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index ea3224a480c4..8cbd7a5334e2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -17,6 +17,8 @@ #include "i915_gem_ww.h" #include "i915_vma_types.h" +enum intel_region_id; + /* * XXX: There is a prevalence of the assumption that we fit the * object's page count inside a 32bit _signed_ variable. Let's document @@ -597,6 +599,16 @@ bool i915_gem_object_migratable(struct drm_i915_gem_object *obj); bool i915_gem_object_validates_to_lmem(struct drm_i915_gem_object *obj); +int i915_gem_object_migrate(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww, +
[PATCH 0/4] drm/i915/gem: Introduce a migrate interface
We want to be able to explicitly migrate objects between gem memory regions, initially for display and dma-buf, but there might be more use-cases coming up. Introduce a gem migrate interface, add a selftest and use it for display fb pinning and dma-buf mapping. This series should make the desktop light up on DG1 with DG1-enabled mesa. Matthew Auld (1): drm/i915/gem: Introduce a selftest for the gem object migrate functionality Thomas Hellström (3): drm/i915/gem: Implement object migration drm/i915/display: Migrate objects to LMEM if possible for display drm/i915/gem: Migrate to system at dma-buf map time drivers/gpu/drm/i915/display/intel_display.c | 5 +- drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c| 9 +- drivers/gpu/drm/i915/gem/i915_gem_domain.c| 2 +- drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 21 -- drivers/gpu/drm/i915/gem/i915_gem_object.c| 92 +++ drivers/gpu/drm/i915/gem/i915_gem_object.h| 12 +- .../gpu/drm/i915/gem/i915_gem_object_types.h | 9 + drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 69 +++-- drivers/gpu/drm/i915/gem/i915_gem_wait.c | 19 ++ .../drm/i915/gem/selftests/i915_gem_migrate.c | 237 ++ .../drm/i915/selftests/i915_live_selftests.h | 1 + 11 files changed, 434 insertions(+), 42 deletions(-) create mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c -- 2.31.1
Re: [PATCH 01/11] drm/sched: Split drm_sched_job_init
On Thu, Jun 24, 2021 at 7:39 PM Christian König wrote: > > > > Am 24.06.21 um 19:37 schrieb Daniel Vetter: > > On Thu, Jun 24, 2021 at 7:30 PM Christian König > > wrote: > >> Am 24.06.21 um 16:00 schrieb Daniel Vetter: > >>> This is a very confusingly named function, because not just does it > >>> init an object, it arms it and provides a point of no return for > >>> pushing a job into the scheduler. It would be nice if that's a bit > >>> clearer in the interface. > >> We originally had that in the push_job interface, but moved that to init > >> for some reason I don't remember. > >> > >>> But the real reason is that I want to push the dependency tracking > >>> helpers into the scheduler code, and that means drm_sched_job_init > >>> must be called a lot earlier, without arming the job. > >> I'm really questioning myself if I like that naming. > >> > >> What about using drm_sched_job_add_dependency instead? > > You're suggesting a > > s/drm_sched_job_init/drm_sched_job_add_dependency/, or just replied to > > the wrong patch? > > Replied to the wrong patch accidentally. I was talking about the "await" > terminology. Can you pls reply there so we don't have too much of a confusion in the discussion? Thanks, Daniel > Christian. > > > -Daniel > > > >> Christian. > >> > >>> Signed-off-by: Daniel Vetter > >>> Cc: Lucas Stach > >>> Cc: Russell King > >>> Cc: Christian Gmeiner > >>> Cc: Qiang Yu > >>> Cc: Rob Herring > >>> Cc: Tomeu Vizoso > >>> Cc: Steven Price > >>> Cc: Alyssa Rosenzweig > >>> Cc: David Airlie > >>> Cc: Daniel Vetter > >>> Cc: Sumit Semwal > >>> Cc: "Christian König" > >>> Cc: Masahiro Yamada > >>> Cc: Kees Cook > >>> Cc: Adam Borowski > >>> Cc: Nick Terrell > >>> Cc: Mauro Carvalho Chehab > >>> Cc: Paul Menzel > >>> Cc: Sami Tolvanen > >>> Cc: Viresh Kumar > >>> Cc: Alex Deucher > >>> Cc: Dave Airlie > >>> Cc: Nirmoy Das > >>> Cc: Deepak R Varma > >>> Cc: Lee Jones > >>> Cc: Kevin Wang > >>> Cc: Chen Li > >>> Cc: Luben Tuikov > >>> Cc: "Marek Olšák" > >>> Cc: Dennis Li > >>> Cc: Maarten Lankhorst > >>> Cc: Andrey Grodzovsky > >>> Cc: Sonny Jiang > >>> Cc: Boris Brezillon > >>> Cc: Tian Tao > >>> Cc: Jack Zhang > >>> Cc: etna...@lists.freedesktop.org > >>> Cc: l...@lists.freedesktop.org > >>> Cc: linux-me...@vger.kernel.org > >>> Cc: linaro-mm-...@lists.linaro.org > >>> --- > >>>.gitignore | 1 + > >>>drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++ > >>>drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 ++ > >>>drivers/gpu/drm/etnaviv/etnaviv_sched.c | 2 ++ > >>>drivers/gpu/drm/lima/lima_sched.c| 2 ++ > >>>drivers/gpu/drm/panfrost/panfrost_job.c | 2 ++ > >>>drivers/gpu/drm/scheduler/sched_entity.c | 6 +++--- > >>>drivers/gpu/drm/scheduler/sched_fence.c | 15 ++- > >>>drivers/gpu/drm/scheduler/sched_main.c | 23 ++- > >>>include/drm/gpu_scheduler.h | 6 +- > >>>10 files changed, 51 insertions(+), 10 deletions(-) > >>> > >>> diff --git a/.gitignore b/.gitignore > >>> index 7afd412dadd2..52433a930299 100644 > >>> --- a/.gitignore > >>> +++ b/.gitignore > >>> @@ -66,6 +66,7 @@ modules.order > >>>/modules.builtin > >>>/modules.builtin.modinfo > >>>/modules.nsdeps > >>> +*.builtin > >>> > >>># > >>># RPM spec file (make rpm-pkg) > >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > >>> index c5386d13eb4a..a4ec092af9a7 100644 > >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > >>> @@ -1226,6 +1226,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser > >>> *p, > >>>if (r) > >>>goto error_unlock; > >>> > >>> + drm_sched_job_arm(&job->base); > >>> + > >>>/* No memory allocation is allowed while holding the notifier lock. > >>> * The lock is held until amdgpu_cs_submit is finished and fence is > >>> * added to BOs. > >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > >>> index d33e6d97cc89..5ddb955d2315 100644 > >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > >>> @@ -170,6 +170,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct > >>> drm_sched_entity *entity, > >>>if (r) > >>>return r; > >>> > >>> + drm_sched_job_arm(&job->base); > >>> + > >>>*f = dma_fence_get(&job->base.s_fence->finished); > >>>amdgpu_job_free_resources(job); > >>>drm_sched_entity_push_job(&job->base, entity); > >>> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c > >>> b/drivers/gpu/drm/etnaviv/etnaviv_sched.c > >>> index 19826e504efc..af1671f01c7f 100644 > >>> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c > >>> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c > >>> @@ -163,6 +163,8 @@ int etnaviv_sched_push_job(s
Re: [PATCH 10/11] drm/scheduler: Don't store self-dependencies
On Thu, Jun 24, 2021 at 7:56 PM Christian König wrote: > > Am 24.06.21 um 19:43 schrieb Daniel Vetter: > > On Thu, Jun 24, 2021 at 7:38 PM Christian König > > wrote: > >> Am 24.06.21 um 19:29 schrieb Daniel Vetter: > >>> On Thu, Jun 24, 2021 at 07:03:10PM +0200, Christian König wrote: > Am 24.06.21 um 16:00 schrieb Daniel Vetter: > > This is essentially part of drm_sched_dependency_optimized(), which > > only amdgpu seems to make use of. Use it a bit more. > > > > Signed-off-by: Daniel Vetter > > Cc: "Christian König" > > Cc: Daniel Vetter > > Cc: Luben Tuikov > > Cc: Andrey Grodzovsky > > Cc: Alex Deucher > > Cc: Jack Zhang > > --- > > drivers/gpu/drm/scheduler/sched_main.c | 7 +++ > > 1 file changed, 7 insertions(+) > > > > diff --git a/drivers/gpu/drm/scheduler/sched_main.c > > b/drivers/gpu/drm/scheduler/sched_main.c > > index 370c336d383f..c31d7cf7df74 100644 > > --- a/drivers/gpu/drm/scheduler/sched_main.c > > +++ b/drivers/gpu/drm/scheduler/sched_main.c > > @@ -649,6 +649,13 @@ int drm_sched_job_await_fence(struct drm_sched_job > > *job, > > if (!fence) > > return 0; > > + /* if it's a fence from us it's guaranteed to be earlier */ > > + if (fence->context == job->entity->fence_context || > > + fence->context == job->entity->fence_context + 1) { > > + dma_fence_put(fence); > > + return 0; > > + } > > + > Well NAK. That would break Vulkan. > > I'm assuming your reply means the NAK is retracted and was just the > > usual "this doesn't perfectly fit for amdgpu" reflex? > > Well rather "NAK, you haven't considered that special handling in amdgpu > and if you really want to unify this you need that as well." > > > > The problem is that Vulkan can insert dependencies between jobs which > run on > the same queue. > > So we need to track those as well and if the previous job for the same > queue/scheduler is not yet finished a pipeline synchronization needs to > be > inserted. > > That's one of the reasons we wasn't able to unify the dependency handling > yet. > >>> That sounds like an extremely amdgpu specific constraint? > >> Yeah, that's totally hardware specific. > >> > >> It's just that I don't know how else we could track that without having > >> the same separation as in amdgpu between implicit and explicit fences. > >> And as far as I understand it that's exactly what you want to avoid. > >> > >> As I said this turned out to be really awkward. > >> > >>> You're also the > >>> only one who keeps track of whether the previous job we've scheduled has > >>> finished already (I guess they can get pipelined and you don't flush by > >>> default), so you insert fences. > >> Yes, exactly that. > >> > >>> I guess we can add a await_fence_no_dedup or so for amdgpu, but I'm not > >>> sure why we have to inflict this design constraint on all other drivers? > >>> At least I'm not seeing anything in lima, panfrost, v3d or entaviv that > >>> would break with this, and i915 will also be perfectly fine. > >>> > >>> Also note: I'm not using this for amdgpu, exactly because there's a few > >>> funny things going on. > >> Yeah, exactly the reason why we never unified this. > > Yeah there's clear limits to this, because you also can't use the > > await_implicit helper, because you have to keep filtering for owner or > > the current amdgpu uapi goes horribly slow. I think the benefit would > > be just that we could share the datastructure and the book-keeping, > > but aside from that you'd need your own integration in amdgpu. > > Yeah, but that is trivial. The _add_dependency() function (or however we > want to call it) needs to be exported anyway for adding fences from > syncfile and syncobj. > > Or do you also want to unify the handling for those? I guess we could add some convenience wrapper that pulls in a sync_file or sync_objc automatically. But there's not that much code involved there, and it's also not tricky. Also drivers might need to add dependencies for whatever anyway. The await_implicit is a bit different, because that defines how implicit sync is supposed to work. I guess the bikeshed then boils down to which one is the simple await_fence() function. The one that filters for same timeline, or the one that doesnt. I'd make the non-filtering one the special case so that amdgpu sticks out a bit more - out of 6 drivers with schedulers (i915 included) it seems to be the special one. > > One idea I just had was whether we could use the tag bits xarray has > > for the amdgpu purposed. Like we could do a > > drm_sched_job_await_fence_tagged, where you supply additional > > information (like the "this might be relevant for the vm_flush" and > > things like that). Afaiui xarray tags are very fast to enumerate on if > > you're looking for specific tags, but I
[Bug 213561] [bisected][regression] GFX10 AMDGPUs can no longer enter idle state after commit. Commit has been pushed to stable branches too.
https://bugzilla.kernel.org/show_bug.cgi?id=213561 --- Comment #8 from hagar-du...@wanadoo.fr --- Thanks for pointing to a different commit. I don't really have the time currently to revert a specific commit to try it out, pointing out the problem happening between two consecutive kernel versions should be enough TBH for the author to know what this is about. I don't mind filling another bug if you insist, it would be nice to have the dev show up here and state if that's necessary; the problem might not affect the same hwid, but it's basically identical, I wouldn't be surprised if I open a bug the dev decides it's a duplicate. -- You may reply to this email to add a comment. You are receiving this mail because: You are watching the assignee of the bug.
Re: [PATCH] dma-buf/sync_file: Don't leak fences on merge failure
Am 24.06.21 um 19:47 schrieb Jason Ekstrand: Each add_fence() call does a dma_fence_get() on the relevant fence. In the error path, we weren't calling dma_fence_put() so all those fences got leaked. Also, in the krealloc_array failure case, we weren't freeing the fences array. Instead, ensure that i and fences are always zero-initialized and dma_fence_put() all the fences and kfree(fences) on every error path. Signed-off-by: Jason Ekstrand Fixes: a02b9dc90d84 ("dma-buf/sync_file: refactor fence storage in struct sync_file") Cc: Gustavo Padovan Cc: Christian König Reviewed-by: Christian König --- drivers/dma-buf/sync_file.c | 13 +++-- 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index 20d9bddbb985b..394e6e1e96860 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -211,8 +211,8 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, struct sync_file *b) { struct sync_file *sync_file; - struct dma_fence **fences, **nfences, **a_fences, **b_fences; - int i, i_a, i_b, num_fences, a_num_fences, b_num_fences; + struct dma_fence **fences = NULL, **nfences, **a_fences, **b_fences; + int i = 0, i_a, i_b, num_fences, a_num_fences, b_num_fences; sync_file = sync_file_alloc(); if (!sync_file) @@ -236,7 +236,7 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, * If a sync_file can only be created with sync_file_merge * and sync_file_create, this is a reasonable assumption. */ - for (i = i_a = i_b = 0; i_a < a_num_fences && i_b < b_num_fences; ) { + for (i_a = i_b = 0; i_a < a_num_fences && i_b < b_num_fences; ) { struct dma_fence *pt_a = a_fences[i_a]; struct dma_fence *pt_b = b_fences[i_b]; @@ -277,15 +277,16 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, fences = nfences; } - if (sync_file_set_fence(sync_file, fences, i) < 0) { - kfree(fences); + if (sync_file_set_fence(sync_file, fences, i) < 0) goto err; - } strlcpy(sync_file->user_name, name, sizeof(sync_file->user_name)); return sync_file; err: + while (i) + dma_fence_put(fences[--i]); + kfree(fences); fput(sync_file->file); return NULL;
Re: [PATCH 10/11] drm/scheduler: Don't store self-dependencies
Am 24.06.21 um 19:43 schrieb Daniel Vetter: On Thu, Jun 24, 2021 at 7:38 PM Christian König wrote: Am 24.06.21 um 19:29 schrieb Daniel Vetter: On Thu, Jun 24, 2021 at 07:03:10PM +0200, Christian König wrote: Am 24.06.21 um 16:00 schrieb Daniel Vetter: This is essentially part of drm_sched_dependency_optimized(), which only amdgpu seems to make use of. Use it a bit more. Signed-off-by: Daniel Vetter Cc: "Christian König" Cc: Daniel Vetter Cc: Luben Tuikov Cc: Andrey Grodzovsky Cc: Alex Deucher Cc: Jack Zhang --- drivers/gpu/drm/scheduler/sched_main.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 370c336d383f..c31d7cf7df74 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -649,6 +649,13 @@ int drm_sched_job_await_fence(struct drm_sched_job *job, if (!fence) return 0; + /* if it's a fence from us it's guaranteed to be earlier */ + if (fence->context == job->entity->fence_context || + fence->context == job->entity->fence_context + 1) { + dma_fence_put(fence); + return 0; + } + Well NAK. That would break Vulkan. I'm assuming your reply means the NAK is retracted and was just the usual "this doesn't perfectly fit for amdgpu" reflex? Well rather "NAK, you haven't considered that special handling in amdgpu and if you really want to unify this you need that as well." The problem is that Vulkan can insert dependencies between jobs which run on the same queue. So we need to track those as well and if the previous job for the same queue/scheduler is not yet finished a pipeline synchronization needs to be inserted. That's one of the reasons we wasn't able to unify the dependency handling yet. That sounds like an extremely amdgpu specific constraint? Yeah, that's totally hardware specific. It's just that I don't know how else we could track that without having the same separation as in amdgpu between implicit and explicit fences. And as far as I understand it that's exactly what you want to avoid. As I said this turned out to be really awkward. You're also the only one who keeps track of whether the previous job we've scheduled has finished already (I guess they can get pipelined and you don't flush by default), so you insert fences. Yes, exactly that. I guess we can add a await_fence_no_dedup or so for amdgpu, but I'm not sure why we have to inflict this design constraint on all other drivers? At least I'm not seeing anything in lima, panfrost, v3d or entaviv that would break with this, and i915 will also be perfectly fine. Also note: I'm not using this for amdgpu, exactly because there's a few funny things going on. Yeah, exactly the reason why we never unified this. Yeah there's clear limits to this, because you also can't use the await_implicit helper, because you have to keep filtering for owner or the current amdgpu uapi goes horribly slow. I think the benefit would be just that we could share the datastructure and the book-keeping, but aside from that you'd need your own integration in amdgpu. Yeah, but that is trivial. The _add_dependency() function (or however we want to call it) needs to be exported anyway for adding fences from syncfile and syncobj. Or do you also want to unify the handling for those? One idea I just had was whether we could use the tag bits xarray has for the amdgpu purposed. Like we could do a drm_sched_job_await_fence_tagged, where you supply additional information (like the "this might be relevant for the vm_flush" and things like that). Afaiui xarray tags are very fast to enumerate on if you're looking for specific tags, but I might be wrong. Ideally this would avoid the need for the duplicated amdgpu_job->sched. That could work. Essentially we just need the information from the scheduler which is the last fence which was dependency optimized. In other words when you push jobs like those to the same scheduler J1 J2 -> depends on J1. J3 -> depends on whatever, but not j2 The hardware needs to insert a flush between J2 and J1, but not between j3 and j2. This makes roughly 19% performance difference for some OpenGL games and incorrect rendering for Vulkan if you mess it up either way or the other. Regards, Christian. Cheers, Daniel Regards, Christian. Finally: You _really_ need explicit dependency handling for vulkan in your uapi, instead of the kernel second-guessing what userspace might be doing. That's really not how vulkan is designed to work :-) Cheers, Daniel Christian. /* Deduplicate if we already depend on a fence from the same context. * This lets the size of the array of deps scale with the number of * engines involved, rather than the number of BOs.
[PATCH] dma-buf/sync_file: Don't leak fences on merge failure
Each add_fence() call does a dma_fence_get() on the relevant fence. In the error path, we weren't calling dma_fence_put() so all those fences got leaked. Also, in the krealloc_array failure case, we weren't freeing the fences array. Instead, ensure that i and fences are always zero-initialized and dma_fence_put() all the fences and kfree(fences) on every error path. Signed-off-by: Jason Ekstrand Fixes: a02b9dc90d84 ("dma-buf/sync_file: refactor fence storage in struct sync_file") Cc: Gustavo Padovan Cc: Christian König --- drivers/dma-buf/sync_file.c | 13 +++-- 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index 20d9bddbb985b..394e6e1e96860 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -211,8 +211,8 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, struct sync_file *b) { struct sync_file *sync_file; - struct dma_fence **fences, **nfences, **a_fences, **b_fences; - int i, i_a, i_b, num_fences, a_num_fences, b_num_fences; + struct dma_fence **fences = NULL, **nfences, **a_fences, **b_fences; + int i = 0, i_a, i_b, num_fences, a_num_fences, b_num_fences; sync_file = sync_file_alloc(); if (!sync_file) @@ -236,7 +236,7 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, * If a sync_file can only be created with sync_file_merge * and sync_file_create, this is a reasonable assumption. */ - for (i = i_a = i_b = 0; i_a < a_num_fences && i_b < b_num_fences; ) { + for (i_a = i_b = 0; i_a < a_num_fences && i_b < b_num_fences; ) { struct dma_fence *pt_a = a_fences[i_a]; struct dma_fence *pt_b = b_fences[i_b]; @@ -277,15 +277,16 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, fences = nfences; } - if (sync_file_set_fence(sync_file, fences, i) < 0) { - kfree(fences); + if (sync_file_set_fence(sync_file, fences, i) < 0) goto err; - } strlcpy(sync_file->user_name, name, sizeof(sync_file->user_name)); return sync_file; err: + while (i) + dma_fence_put(fences[--i]); + kfree(fences); fput(sync_file->file); return NULL; -- 2.31.1
Re: [PATCH 10/11] drm/scheduler: Don't store self-dependencies
On Thu, Jun 24, 2021 at 7:38 PM Christian König wrote: > > Am 24.06.21 um 19:29 schrieb Daniel Vetter: > > On Thu, Jun 24, 2021 at 07:03:10PM +0200, Christian König wrote: > >> Am 24.06.21 um 16:00 schrieb Daniel Vetter: > >>> This is essentially part of drm_sched_dependency_optimized(), which > >>> only amdgpu seems to make use of. Use it a bit more. > >>> > >>> Signed-off-by: Daniel Vetter > >>> Cc: "Christian König" > >>> Cc: Daniel Vetter > >>> Cc: Luben Tuikov > >>> Cc: Andrey Grodzovsky > >>> Cc: Alex Deucher > >>> Cc: Jack Zhang > >>> --- > >>>drivers/gpu/drm/scheduler/sched_main.c | 7 +++ > >>>1 file changed, 7 insertions(+) > >>> > >>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c > >>> b/drivers/gpu/drm/scheduler/sched_main.c > >>> index 370c336d383f..c31d7cf7df74 100644 > >>> --- a/drivers/gpu/drm/scheduler/sched_main.c > >>> +++ b/drivers/gpu/drm/scheduler/sched_main.c > >>> @@ -649,6 +649,13 @@ int drm_sched_job_await_fence(struct drm_sched_job > >>> *job, > >>> if (!fence) > >>> return 0; > >>> + /* if it's a fence from us it's guaranteed to be earlier */ > >>> + if (fence->context == job->entity->fence_context || > >>> + fence->context == job->entity->fence_context + 1) { > >>> + dma_fence_put(fence); > >>> + return 0; > >>> + } > >>> + > >> Well NAK. That would break Vulkan. I'm assuming your reply means the NAK is retracted and was just the usual "this doesn't perfectly fit for amdgpu" reflex? > >> The problem is that Vulkan can insert dependencies between jobs which run > >> on > >> the same queue. > >> > >> So we need to track those as well and if the previous job for the same > >> queue/scheduler is not yet finished a pipeline synchronization needs to be > >> inserted. > >> > >> That's one of the reasons we wasn't able to unify the dependency handling > >> yet. > > That sounds like an extremely amdgpu specific constraint? > > Yeah, that's totally hardware specific. > > It's just that I don't know how else we could track that without having > the same separation as in amdgpu between implicit and explicit fences. > And as far as I understand it that's exactly what you want to avoid. > > As I said this turned out to be really awkward. > > > You're also the > > only one who keeps track of whether the previous job we've scheduled has > > finished already (I guess they can get pipelined and you don't flush by > > default), so you insert fences. > > Yes, exactly that. > > > I guess we can add a await_fence_no_dedup or so for amdgpu, but I'm not > > sure why we have to inflict this design constraint on all other drivers? > > At least I'm not seeing anything in lima, panfrost, v3d or entaviv that > > would break with this, and i915 will also be perfectly fine. > > > > Also note: I'm not using this for amdgpu, exactly because there's a few > > funny things going on. > > Yeah, exactly the reason why we never unified this. Yeah there's clear limits to this, because you also can't use the await_implicit helper, because you have to keep filtering for owner or the current amdgpu uapi goes horribly slow. I think the benefit would be just that we could share the datastructure and the book-keeping, but aside from that you'd need your own integration in amdgpu. One idea I just had was whether we could use the tag bits xarray has for the amdgpu purposed. Like we could do a drm_sched_job_await_fence_tagged, where you supply additional information (like the "this might be relevant for the vm_flush" and things like that). Afaiui xarray tags are very fast to enumerate on if you're looking for specific tags, but I might be wrong. Ideally this would avoid the need for the duplicated amdgpu_job->sched. Cheers, Daniel > Regards, > Christian. > > > Finally: You _really_ need explicit dependency handling for vulkan in your > > uapi, instead of the kernel second-guessing what userspace might be doing. > > That's really not how vulkan is designed to work :-) > > > > > Cheers, Daniel > > > > > >> Christian. > >> > >>> /* Deduplicate if we already depend on a fence from the same context. > >>> * This lets the size of the array of deps scale with the number of > >>> * engines involved, rather than the number of BOs. > -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch
Re: [PATCH 01/11] drm/sched: Split drm_sched_job_init
Am 24.06.21 um 19:37 schrieb Daniel Vetter: On Thu, Jun 24, 2021 at 7:30 PM Christian König wrote: Am 24.06.21 um 16:00 schrieb Daniel Vetter: This is a very confusingly named function, because not just does it init an object, it arms it and provides a point of no return for pushing a job into the scheduler. It would be nice if that's a bit clearer in the interface. We originally had that in the push_job interface, but moved that to init for some reason I don't remember. But the real reason is that I want to push the dependency tracking helpers into the scheduler code, and that means drm_sched_job_init must be called a lot earlier, without arming the job. I'm really questioning myself if I like that naming. What about using drm_sched_job_add_dependency instead? You're suggesting a s/drm_sched_job_init/drm_sched_job_add_dependency/, or just replied to the wrong patch? Replied to the wrong patch accidentally. I was talking about the "await" terminology. Christian. -Daniel Christian. Signed-off-by: Daniel Vetter Cc: Lucas Stach Cc: Russell King Cc: Christian Gmeiner Cc: Qiang Yu Cc: Rob Herring Cc: Tomeu Vizoso Cc: Steven Price Cc: Alyssa Rosenzweig Cc: David Airlie Cc: Daniel Vetter Cc: Sumit Semwal Cc: "Christian König" Cc: Masahiro Yamada Cc: Kees Cook Cc: Adam Borowski Cc: Nick Terrell Cc: Mauro Carvalho Chehab Cc: Paul Menzel Cc: Sami Tolvanen Cc: Viresh Kumar Cc: Alex Deucher Cc: Dave Airlie Cc: Nirmoy Das Cc: Deepak R Varma Cc: Lee Jones Cc: Kevin Wang Cc: Chen Li Cc: Luben Tuikov Cc: "Marek Olšák" Cc: Dennis Li Cc: Maarten Lankhorst Cc: Andrey Grodzovsky Cc: Sonny Jiang Cc: Boris Brezillon Cc: Tian Tao Cc: Jack Zhang Cc: etna...@lists.freedesktop.org Cc: l...@lists.freedesktop.org Cc: linux-me...@vger.kernel.org Cc: linaro-mm-...@lists.linaro.org --- .gitignore | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 ++ drivers/gpu/drm/etnaviv/etnaviv_sched.c | 2 ++ drivers/gpu/drm/lima/lima_sched.c| 2 ++ drivers/gpu/drm/panfrost/panfrost_job.c | 2 ++ drivers/gpu/drm/scheduler/sched_entity.c | 6 +++--- drivers/gpu/drm/scheduler/sched_fence.c | 15 ++- drivers/gpu/drm/scheduler/sched_main.c | 23 ++- include/drm/gpu_scheduler.h | 6 +- 10 files changed, 51 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 7afd412dadd2..52433a930299 100644 --- a/.gitignore +++ b/.gitignore @@ -66,6 +66,7 @@ modules.order /modules.builtin /modules.builtin.modinfo /modules.nsdeps +*.builtin # # RPM spec file (make rpm-pkg) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index c5386d13eb4a..a4ec092af9a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1226,6 +1226,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, if (r) goto error_unlock; + drm_sched_job_arm(&job->base); + /* No memory allocation is allowed while holding the notifier lock. * The lock is held until amdgpu_cs_submit is finished and fence is * added to BOs. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index d33e6d97cc89..5ddb955d2315 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -170,6 +170,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, if (r) return r; + drm_sched_job_arm(&job->base); + *f = dma_fence_get(&job->base.s_fence->finished); amdgpu_job_free_resources(job); drm_sched_entity_push_job(&job->base, entity); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index 19826e504efc..af1671f01c7f 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -163,6 +163,8 @@ int etnaviv_sched_push_job(struct drm_sched_entity *sched_entity, if (ret) goto out_unlock; + drm_sched_job_arm(&submit->sched_job); + submit->out_fence = dma_fence_get(&submit->sched_job.s_fence->finished); submit->out_fence_id = idr_alloc_cyclic(&submit->gpu->fence_idr, submit->out_fence, 0, diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c index ecf3267334ff..bd1af1fd8c0f 100644 --- a/drivers/gpu/drm/lima/lima_sched.c +++ b/drivers/gpu/drm/lima/lima_sched.c @@ -129,6 +129,8 @@ int lima_sched_task_init(struct lima_sched_task *task, return err; } + drm_sched_job_arm(&task->base); + task->num_bos = num_bos; task->vm = lima_vm_get(vm); diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
Re: [PATCH 10/11] drm/scheduler: Don't store self-dependencies
Am 24.06.21 um 19:29 schrieb Daniel Vetter: On Thu, Jun 24, 2021 at 07:03:10PM +0200, Christian König wrote: Am 24.06.21 um 16:00 schrieb Daniel Vetter: This is essentially part of drm_sched_dependency_optimized(), which only amdgpu seems to make use of. Use it a bit more. Signed-off-by: Daniel Vetter Cc: "Christian König" Cc: Daniel Vetter Cc: Luben Tuikov Cc: Andrey Grodzovsky Cc: Alex Deucher Cc: Jack Zhang --- drivers/gpu/drm/scheduler/sched_main.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 370c336d383f..c31d7cf7df74 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -649,6 +649,13 @@ int drm_sched_job_await_fence(struct drm_sched_job *job, if (!fence) return 0; + /* if it's a fence from us it's guaranteed to be earlier */ + if (fence->context == job->entity->fence_context || + fence->context == job->entity->fence_context + 1) { + dma_fence_put(fence); + return 0; + } + Well NAK. That would break Vulkan. The problem is that Vulkan can insert dependencies between jobs which run on the same queue. So we need to track those as well and if the previous job for the same queue/scheduler is not yet finished a pipeline synchronization needs to be inserted. That's one of the reasons we wasn't able to unify the dependency handling yet. That sounds like an extremely amdgpu specific constraint? Yeah, that's totally hardware specific. It's just that I don't know how else we could track that without having the same separation as in amdgpu between implicit and explicit fences. And as far as I understand it that's exactly what you want to avoid. As I said this turned out to be really awkward. You're also the only one who keeps track of whether the previous job we've scheduled has finished already (I guess they can get pipelined and you don't flush by default), so you insert fences. Yes, exactly that. I guess we can add a await_fence_no_dedup or so for amdgpu, but I'm not sure why we have to inflict this design constraint on all other drivers? At least I'm not seeing anything in lima, panfrost, v3d or entaviv that would break with this, and i915 will also be perfectly fine. Also note: I'm not using this for amdgpu, exactly because there's a few funny things going on. Yeah, exactly the reason why we never unified this. Regards, Christian. Finally: You _really_ need explicit dependency handling for vulkan in your uapi, instead of the kernel second-guessing what userspace might be doing. That's really not how vulkan is designed to work :-) Cheers, Daniel Christian. /* Deduplicate if we already depend on a fence from the same context. * This lets the size of the array of deps scale with the number of * engines involved, rather than the number of BOs.
Re: [PATCH 01/11] drm/sched: Split drm_sched_job_init
On Thu, Jun 24, 2021 at 7:30 PM Christian König wrote: > > Am 24.06.21 um 16:00 schrieb Daniel Vetter: > > This is a very confusingly named function, because not just does it > > init an object, it arms it and provides a point of no return for > > pushing a job into the scheduler. It would be nice if that's a bit > > clearer in the interface. > > We originally had that in the push_job interface, but moved that to init > for some reason I don't remember. > > > But the real reason is that I want to push the dependency tracking > > helpers into the scheduler code, and that means drm_sched_job_init > > must be called a lot earlier, without arming the job. > > I'm really questioning myself if I like that naming. > > What about using drm_sched_job_add_dependency instead? You're suggesting a s/drm_sched_job_init/drm_sched_job_add_dependency/, or just replied to the wrong patch? -Daniel > > Christian. > > > > > Signed-off-by: Daniel Vetter > > Cc: Lucas Stach > > Cc: Russell King > > Cc: Christian Gmeiner > > Cc: Qiang Yu > > Cc: Rob Herring > > Cc: Tomeu Vizoso > > Cc: Steven Price > > Cc: Alyssa Rosenzweig > > Cc: David Airlie > > Cc: Daniel Vetter > > Cc: Sumit Semwal > > Cc: "Christian König" > > Cc: Masahiro Yamada > > Cc: Kees Cook > > Cc: Adam Borowski > > Cc: Nick Terrell > > Cc: Mauro Carvalho Chehab > > Cc: Paul Menzel > > Cc: Sami Tolvanen > > Cc: Viresh Kumar > > Cc: Alex Deucher > > Cc: Dave Airlie > > Cc: Nirmoy Das > > Cc: Deepak R Varma > > Cc: Lee Jones > > Cc: Kevin Wang > > Cc: Chen Li > > Cc: Luben Tuikov > > Cc: "Marek Olšák" > > Cc: Dennis Li > > Cc: Maarten Lankhorst > > Cc: Andrey Grodzovsky > > Cc: Sonny Jiang > > Cc: Boris Brezillon > > Cc: Tian Tao > > Cc: Jack Zhang > > Cc: etna...@lists.freedesktop.org > > Cc: l...@lists.freedesktop.org > > Cc: linux-me...@vger.kernel.org > > Cc: linaro-mm-...@lists.linaro.org > > --- > > .gitignore | 1 + > > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++ > > drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 ++ > > drivers/gpu/drm/etnaviv/etnaviv_sched.c | 2 ++ > > drivers/gpu/drm/lima/lima_sched.c| 2 ++ > > drivers/gpu/drm/panfrost/panfrost_job.c | 2 ++ > > drivers/gpu/drm/scheduler/sched_entity.c | 6 +++--- > > drivers/gpu/drm/scheduler/sched_fence.c | 15 ++- > > drivers/gpu/drm/scheduler/sched_main.c | 23 ++- > > include/drm/gpu_scheduler.h | 6 +- > > 10 files changed, 51 insertions(+), 10 deletions(-) > > > > diff --git a/.gitignore b/.gitignore > > index 7afd412dadd2..52433a930299 100644 > > --- a/.gitignore > > +++ b/.gitignore > > @@ -66,6 +66,7 @@ modules.order > > /modules.builtin > > /modules.builtin.modinfo > > /modules.nsdeps > > +*.builtin > > > > # > > # RPM spec file (make rpm-pkg) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > > index c5386d13eb4a..a4ec092af9a7 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > > @@ -1226,6 +1226,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser > > *p, > > if (r) > > goto error_unlock; > > > > + drm_sched_job_arm(&job->base); > > + > > /* No memory allocation is allowed while holding the notifier lock. > >* The lock is held until amdgpu_cs_submit is finished and fence is > >* added to BOs. > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > > index d33e6d97cc89..5ddb955d2315 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > > @@ -170,6 +170,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct > > drm_sched_entity *entity, > > if (r) > > return r; > > > > + drm_sched_job_arm(&job->base); > > + > > *f = dma_fence_get(&job->base.s_fence->finished); > > amdgpu_job_free_resources(job); > > drm_sched_entity_push_job(&job->base, entity); > > diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c > > b/drivers/gpu/drm/etnaviv/etnaviv_sched.c > > index 19826e504efc..af1671f01c7f 100644 > > --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c > > +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c > > @@ -163,6 +163,8 @@ int etnaviv_sched_push_job(struct drm_sched_entity > > *sched_entity, > > if (ret) > > goto out_unlock; > > > > + drm_sched_job_arm(&submit->sched_job); > > + > > submit->out_fence = > > dma_fence_get(&submit->sched_job.s_fence->finished); > > submit->out_fence_id = idr_alloc_cyclic(&submit->gpu->fence_idr, > > submit->out_fence, 0, > > diff --git a/drivers/gpu/drm/lima/lima_sched.c > > b/drivers/gpu/drm/lima/lima_sched.c > > index ecf3267334ff..bd1af1fd8c0f 100644 > > --- a/drivers/gpu/drm/lima/lima_sched.c > > +++ b/drivers/gpu/drm/lima/
Re: [Intel-gfx] [PATCH 05/47] drm/i915/guc: Add stall timer to non blocking CTB send function
On 24.06.2021 09:04, Matthew Brost wrote: > Implement a stall timer which fails H2G CTBs once a period of time > with no forward progress is reached to prevent deadlock. > > Also update to ct_write to return -EIO rather than -EPIPE on a > corrupted descriptor. by doing so you will have the same error code for two different problems: a) corrupted CTB descriptor (definitely unrecoverable) b) long stall in CTB processing (still recoverable) while caller is explicitly instructed to retry only on: c) temporary stall in CTB processing (likely recoverable) so why do we want to limit our diagnostics? > > Signed-off-by: John Harrison > Signed-off-by: Daniele Ceraolo Spurio > Signed-off-by: Matthew Brost > --- > drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 47 +-- > drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 4 ++ > 2 files changed, 48 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > index c9a65d05911f..27ec30b5ef47 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > @@ -319,6 +319,7 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct) > goto err_deregister; > > ct->enabled = true; > + ct->stall_time = KTIME_MAX; > > return 0; > > @@ -392,7 +393,7 @@ static int ct_write(struct intel_guc_ct *ct, > unsigned int i; > > if (unlikely(ctb->broken)) > - return -EPIPE; > + return -EIO; > > if (unlikely(desc->status)) > goto corrupted; > @@ -464,7 +465,7 @@ static int ct_write(struct intel_guc_ct *ct, > CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n", >desc->head, desc->tail, desc->status); > ctb->broken = true; > - return -EPIPE; > + return -EIO; > } > > /** > @@ -507,6 +508,18 @@ static int wait_for_ct_request_update(struct ct_request > *req, u32 *status) > return err; > } > > +#define GUC_CTB_TIMEOUT_MS 1500 it's 150% of core CTB timeout, maybe we should correlate them ? > +static inline bool ct_deadlocked(struct intel_guc_ct *ct) > +{ > + long timeout = GUC_CTB_TIMEOUT_MS; > + bool ret = ktime_ms_delta(ktime_get(), ct->stall_time) > timeout; > + > + if (unlikely(ret)) > + CT_ERROR(ct, "CT deadlocked\n"); nit: in commit message you said all these changes are to "prevent deadlock" so maybe this message should rather be: int delta = ktime_ms_delta(ktime_get(), ct->stall_time); CT_ERROR(ct, "Communication stalled for %dms\n", delta); (note that CT_ERROR already adds "CT" prefix) > + > + return ret; > +} > + > static inline bool h2g_has_room(struct intel_guc_ct_buffer *ctb, u32 len_dw) > { > struct guc_ct_buffer_desc *desc = ctb->desc; > @@ -518,6 +531,26 @@ static inline bool h2g_has_room(struct > intel_guc_ct_buffer *ctb, u32 len_dw) > return space >= len_dw; > } > > +static int has_room_nb(struct intel_guc_ct *ct, u32 len_dw) > +{ > + struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; > + > + lockdep_assert_held(&ct->ctbs.send.lock); > + > + if (unlikely(!h2g_has_room(ctb, len_dw))) { > + if (ct->stall_time == KTIME_MAX) > + ct->stall_time = ktime_get(); > + > + if (unlikely(ct_deadlocked(ct))) and maybe above message should be printed somewhere around here when we detect "deadlock" for the first time? > + return -EIO; > + else > + return -EBUSY; > + } > + > + ct->stall_time = KTIME_MAX; > + return 0; > +} > + > static int ct_send_nb(struct intel_guc_ct *ct, > const u32 *action, > u32 len, > @@ -530,7 +563,7 @@ static int ct_send_nb(struct intel_guc_ct *ct, > > spin_lock_irqsave(&ctb->lock, spin_flags); > > - ret = h2g_has_room(ctb, len + 1); > + ret = has_room_nb(ct, len + 1); > if (unlikely(ret)) > goto out; > > @@ -574,11 +607,19 @@ static int ct_send(struct intel_guc_ct *ct, > retry: > spin_lock_irqsave(&ct->ctbs.send.lock, flags); > if (unlikely(!h2g_has_room(ctb, len + 1))) { > + if (ct->stall_time == KTIME_MAX) > + ct->stall_time = ktime_get(); as this is a repeated pattern, maybe it should be moved to h2g_has_room or other wrapper ? > spin_unlock_irqrestore(&ct->ctbs.send.lock, flags); > + > + if (unlikely(ct_deadlocked(ct))) > + return -EIO; > + > cond_resched(); > goto retry; > } > > + ct->stall_time = KTIME_MAX; this one too > + > fence = ct_get_next_fence(ct); > request.fence = fence; > request.status = 0; > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h > b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h > index eb69263324ba..55ef7c52472f 100644 > ---
Re: [PATCH 01/11] drm/sched: Split drm_sched_job_init
Am 24.06.21 um 16:00 schrieb Daniel Vetter: This is a very confusingly named function, because not just does it init an object, it arms it and provides a point of no return for pushing a job into the scheduler. It would be nice if that's a bit clearer in the interface. We originally had that in the push_job interface, but moved that to init for some reason I don't remember. But the real reason is that I want to push the dependency tracking helpers into the scheduler code, and that means drm_sched_job_init must be called a lot earlier, without arming the job. I'm really questioning myself if I like that naming. What about using drm_sched_job_add_dependency instead? Christian. Signed-off-by: Daniel Vetter Cc: Lucas Stach Cc: Russell King Cc: Christian Gmeiner Cc: Qiang Yu Cc: Rob Herring Cc: Tomeu Vizoso Cc: Steven Price Cc: Alyssa Rosenzweig Cc: David Airlie Cc: Daniel Vetter Cc: Sumit Semwal Cc: "Christian König" Cc: Masahiro Yamada Cc: Kees Cook Cc: Adam Borowski Cc: Nick Terrell Cc: Mauro Carvalho Chehab Cc: Paul Menzel Cc: Sami Tolvanen Cc: Viresh Kumar Cc: Alex Deucher Cc: Dave Airlie Cc: Nirmoy Das Cc: Deepak R Varma Cc: Lee Jones Cc: Kevin Wang Cc: Chen Li Cc: Luben Tuikov Cc: "Marek Olšák" Cc: Dennis Li Cc: Maarten Lankhorst Cc: Andrey Grodzovsky Cc: Sonny Jiang Cc: Boris Brezillon Cc: Tian Tao Cc: Jack Zhang Cc: etna...@lists.freedesktop.org Cc: l...@lists.freedesktop.org Cc: linux-me...@vger.kernel.org Cc: linaro-mm-...@lists.linaro.org --- .gitignore | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 ++ drivers/gpu/drm/etnaviv/etnaviv_sched.c | 2 ++ drivers/gpu/drm/lima/lima_sched.c| 2 ++ drivers/gpu/drm/panfrost/panfrost_job.c | 2 ++ drivers/gpu/drm/scheduler/sched_entity.c | 6 +++--- drivers/gpu/drm/scheduler/sched_fence.c | 15 ++- drivers/gpu/drm/scheduler/sched_main.c | 23 ++- include/drm/gpu_scheduler.h | 6 +- 10 files changed, 51 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 7afd412dadd2..52433a930299 100644 --- a/.gitignore +++ b/.gitignore @@ -66,6 +66,7 @@ modules.order /modules.builtin /modules.builtin.modinfo /modules.nsdeps +*.builtin # # RPM spec file (make rpm-pkg) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index c5386d13eb4a..a4ec092af9a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1226,6 +1226,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, if (r) goto error_unlock; + drm_sched_job_arm(&job->base); + /* No memory allocation is allowed while holding the notifier lock. * The lock is held until amdgpu_cs_submit is finished and fence is * added to BOs. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index d33e6d97cc89..5ddb955d2315 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -170,6 +170,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, if (r) return r; + drm_sched_job_arm(&job->base); + *f = dma_fence_get(&job->base.s_fence->finished); amdgpu_job_free_resources(job); drm_sched_entity_push_job(&job->base, entity); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index 19826e504efc..af1671f01c7f 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -163,6 +163,8 @@ int etnaviv_sched_push_job(struct drm_sched_entity *sched_entity, if (ret) goto out_unlock; + drm_sched_job_arm(&submit->sched_job); + submit->out_fence = dma_fence_get(&submit->sched_job.s_fence->finished); submit->out_fence_id = idr_alloc_cyclic(&submit->gpu->fence_idr, submit->out_fence, 0, diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c index ecf3267334ff..bd1af1fd8c0f 100644 --- a/drivers/gpu/drm/lima/lima_sched.c +++ b/drivers/gpu/drm/lima/lima_sched.c @@ -129,6 +129,8 @@ int lima_sched_task_init(struct lima_sched_task *task, return err; } + drm_sched_job_arm(&task->base); + task->num_bos = num_bos; task->vm = lima_vm_get(vm); diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index beb62c8fc851..1e950534b9b0 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -244,6 +244,8 @@ int panfrost_job_push(struct panfrost_job *job) goto unlock; } + drm_sched_job_arm(&job->base); + job->render_done_fence = dma_fen
Re: [PATCH 10/11] drm/scheduler: Don't store self-dependencies
On Thu, Jun 24, 2021 at 07:03:10PM +0200, Christian König wrote: > Am 24.06.21 um 16:00 schrieb Daniel Vetter: > > This is essentially part of drm_sched_dependency_optimized(), which > > only amdgpu seems to make use of. Use it a bit more. > > > > Signed-off-by: Daniel Vetter > > Cc: "Christian König" > > Cc: Daniel Vetter > > Cc: Luben Tuikov > > Cc: Andrey Grodzovsky > > Cc: Alex Deucher > > Cc: Jack Zhang > > --- > > drivers/gpu/drm/scheduler/sched_main.c | 7 +++ > > 1 file changed, 7 insertions(+) > > > > diff --git a/drivers/gpu/drm/scheduler/sched_main.c > > b/drivers/gpu/drm/scheduler/sched_main.c > > index 370c336d383f..c31d7cf7df74 100644 > > --- a/drivers/gpu/drm/scheduler/sched_main.c > > +++ b/drivers/gpu/drm/scheduler/sched_main.c > > @@ -649,6 +649,13 @@ int drm_sched_job_await_fence(struct drm_sched_job > > *job, > > if (!fence) > > return 0; > > + /* if it's a fence from us it's guaranteed to be earlier */ > > + if (fence->context == job->entity->fence_context || > > + fence->context == job->entity->fence_context + 1) { > > + dma_fence_put(fence); > > + return 0; > > + } > > + > > Well NAK. That would break Vulkan. > > The problem is that Vulkan can insert dependencies between jobs which run on > the same queue. > > So we need to track those as well and if the previous job for the same > queue/scheduler is not yet finished a pipeline synchronization needs to be > inserted. > > That's one of the reasons we wasn't able to unify the dependency handling > yet. That sounds like an extremely amdgpu specific constraint? You're also the only one who keeps track of whether the previous job we've scheduled has finished already (I guess they can get pipelined and you don't flush by default), so you insert fences. I guess we can add a await_fence_no_dedup or so for amdgpu, but I'm not sure why we have to inflict this design constraint on all other drivers? At least I'm not seeing anything in lima, panfrost, v3d or entaviv that would break with this, and i915 will also be perfectly fine. Also note: I'm not using this for amdgpu, exactly because there's a few funny things going on. Finally: You _really_ need explicit dependency handling for vulkan in your uapi, instead of the kernel second-guessing what userspace might be doing. That's really not how vulkan is designed to work :-) Cheers, Daniel > Christian. > > > /* Deduplicate if we already depend on a fence from the same context. > > * This lets the size of the array of deps scale with the number of > > * engines involved, rather than the number of BOs. > -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch
Re: [Intel-gfx] [RFC PATCH 36/97] drm/i915/guc: Add non blocking CTB send function
On Thu, Jun 24, 2021 at 09:38:33AM -0700, Matthew Brost wrote: > On Thu, Jun 10, 2021 at 05:27:48PM +0200, Daniel Vetter wrote: > > On Wed, Jun 09, 2021 at 04:10:23PM -0700, Matthew Brost wrote: > > > On Tue, Jun 08, 2021 at 10:46:15AM +0200, Daniel Vetter wrote: > > > > On Tue, Jun 8, 2021 at 10:39 AM Tvrtko Ursulin > > > > wrote: > > > > > > > > > > > > > > > On 07/06/2021 18:31, Matthew Brost wrote: > > > > > > On Thu, May 27, 2021 at 04:11:50PM +0100, Tvrtko Ursulin wrote: > > > > > >> > > > > > >> On 27/05/2021 15:35, Matthew Brost wrote: > > > > > >>> On Thu, May 27, 2021 at 11:02:24AM +0100, Tvrtko Ursulin wrote: > > > > > > > > > > On 26/05/2021 19:10, Matthew Brost wrote: > > > > > > > > > > [snip] > > > > > > > > > > > +static int ct_send_nb(struct intel_guc_ct *ct, > > > > > > + const u32 *action, > > > > > > + u32 len, > > > > > > + u32 flags) > > > > > > +{ > > > > > > + struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; > > > > > > + unsigned long spin_flags; > > > > > > + u32 fence; > > > > > > + int ret; > > > > > > + > > > > > > + spin_lock_irqsave(&ctb->lock, spin_flags); > > > > > > + > > > > > > + ret = ctb_has_room(ctb, len + 1); > > > > > > + if (unlikely(ret)) > > > > > > + goto out; > > > > > > + > > > > > > + fence = ct_get_next_fence(ct); > > > > > > + ret = ct_write(ct, action, len, fence, flags); > > > > > > + if (unlikely(ret)) > > > > > > + goto out; > > > > > > + > > > > > > + intel_guc_notify(ct_to_guc(ct)); > > > > > > + > > > > > > +out: > > > > > > + spin_unlock_irqrestore(&ctb->lock, spin_flags); > > > > > > + > > > > > > + return ret; > > > > > > +} > > > > > > + > > > > > > static int ct_send(struct intel_guc_ct *ct, > > > > > > const u32 *action, > > > > > > u32 len, > > > > > > @@ -473,6 +541,7 @@ static int ct_send(struct intel_guc_ct > > > > > > *ct, > > > > > > u32 response_buf_size, > > > > > > u32 *status) > > > > > > { > > > > > > + struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; > > > > > > struct ct_request request; > > > > > > unsigned long flags; > > > > > > u32 fence; > > > > > > @@ -482,8 +551,20 @@ static int ct_send(struct intel_guc_ct > > > > > > *ct, > > > > > > GEM_BUG_ON(!len); > > > > > > GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK); > > > > > > GEM_BUG_ON(!response_buf && > > > > > > response_buf_size); > > > > > > + might_sleep(); > > > > > > > > > > Sleep is just cond_resched below or there is more? > > > > > > > > > > >>> > > > > > >>> Yes, the cond_resched. > > > > > >>> > > > > > > + /* > > > > > > + * We use a lazy spin wait loop here as we believe > > > > > > that if the CT > > > > > > + * buffers are sized correctly the flow control > > > > > > condition should be > > > > > > + * rare. > > > > > > + */ > > > > > > +retry: > > > > > > spin_lock_irqsave(&ct->ctbs.send.lock, flags); > > > > > > + if (unlikely(!ctb_has_room(ctb, len + 1))) { > > > > > > + spin_unlock_irqrestore(&ct->ctbs.send.lock, > > > > > > flags); > > > > > > + cond_resched(); > > > > > > + goto retry; > > > > > > + } > > > > > > > > > > If this patch is about adding a non-blocking send function, > > > > > and below we can > > > > > see that it creates a fork: > > > > > > > > > > intel_guc_ct_send: > > > > > ... > > > > > if (flags & INTEL_GUC_SEND_NB) > > > > > return ct_send_nb(ct, action, len, flags); > > > > > > > > > > ret = ct_send(ct, action, len, response_buf, > > > > > response_buf_size, &status); > > > > > > > > > > Then why is there a change in ct_send here, which is not the > > > > > new > > > > > non-blocking path? > > > > > > > > > > >>> > > > > > >>> There is not a change to ct_send(), just to intel_guc_ct_send. > > > > > >> > > > > > >> I was doing by the diff which says: > > > > > >> > > > > > >> static int ct_send(struct intel_guc_ct *ct, > > > > > >> cons
Re: [PATCH 06/11] drm/v3d: Move drm_sched_job_init to v3d_job_init
On Thu, Jun 24, 2021 at 09:59:08AM -0700, Emma Anholt wrote: > On Thu, Jun 24, 2021 at 7:00 AM Daniel Vetter wrote: > > > > Prep work for using the scheduler dependency handling. I'll add "We need to call drm_sched_job_init earlier so we can use the new drm_sched_job_await* functions for dependency handling here. That gives a bit more context on what's going on here. > > > > Signed-off-by: Daniel Vetter > > Cc: Emma Anholt > > Back when I wrote this, I think there were rules that there had to be > no failure paths between a job_init and a push. Has that changed? > > I really don't have the context to evaluate this, I'm not sure what > new "scheduler dependency handling" is given that there was already > something that I considered to be dependency handling! Full patch series link: https://lore.kernel.org/dri-devel/20210624140025.438303-1-daniel.vet...@ffwll.ch/T/#t The job_init vs push_job is addressed here: https://lore.kernel.org/dri-devel/20210624140025.438303-1-daniel.vet...@ffwll.ch/T/#mb6c4d75e1c57a5056d7b2ec8fbb9839fc5be41a7 I split job_init into job_init (which can fail, and be done earlier) and job_arm (which can't fail, and must be atomic with the push_job). The entire goal of this is to lift the dependency handling from "everyone copypastes v3d" to "drm_sched has it for you already", which is this patch here: https://lore.kernel.org/dri-devel/20210624140025.438303-1-daniel.vet...@ffwll.ch/T/#m9f64aaf840cbf8815cd2ea9a68f99a51da9baa5f Cheers, Daniel -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch
Re: [PATCH 01/47] drm/i915/guc: Relax CTB response timeout
On 24.06.2021 09:04, Matthew Brost wrote: > In upcoming patch we will allow more CTB requests to be sent in > parallel to the GuC for processing, so we shouldn't assume any more > that GuC will always reply without 10ms. > > Use bigger value hardcoded value of 1s instead. > > v2: Add CONFIG_DRM_I915_GUC_CTB_TIMEOUT config option > v3: > (Daniel Vetter) > - Use hardcoded value of 1s rather than config option > > Signed-off-by: Matthew Brost > Cc: Michal Wajdeczko > --- > drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 6 -- > 1 file changed, 4 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > index 43409044528e..a59e239497ee 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > @@ -474,14 +474,16 @@ static int wait_for_ct_request_update(struct ct_request > *req, u32 *status) > /* >* Fast commands should complete in less than 10us, so sample quickly >* up to that length of time, then switch to a slower sleep-wait loop. > - * No GuC command should ever take longer than 10ms. > + * No GuC command should ever take longer than 10ms but many GuC > + * commands can be inflight at time, so use a 1s timeout on the slower > + * sleep-wait loop. >*/ > #define done \ > (FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \ >GUC_HXG_ORIGIN_GUC) > err = wait_for_us(done, 10); > if (err) > - err = wait_for(done, 10); > + err = wait_for(done, 1000); can we add #defines for these 10/1000 values? with that Reviewed-by: Michal Wajdeczko > #undef done > > if (unlikely(err)) >
Re: [PATCH 10/11] drm/scheduler: Don't store self-dependencies
Am 24.06.21 um 16:00 schrieb Daniel Vetter: This is essentially part of drm_sched_dependency_optimized(), which only amdgpu seems to make use of. Use it a bit more. Signed-off-by: Daniel Vetter Cc: "Christian König" Cc: Daniel Vetter Cc: Luben Tuikov Cc: Andrey Grodzovsky Cc: Alex Deucher Cc: Jack Zhang --- drivers/gpu/drm/scheduler/sched_main.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 370c336d383f..c31d7cf7df74 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -649,6 +649,13 @@ int drm_sched_job_await_fence(struct drm_sched_job *job, if (!fence) return 0; + /* if it's a fence from us it's guaranteed to be earlier */ + if (fence->context == job->entity->fence_context || + fence->context == job->entity->fence_context + 1) { + dma_fence_put(fence); + return 0; + } + Well NAK. That would break Vulkan. The problem is that Vulkan can insert dependencies between jobs which run on the same queue. So we need to track those as well and if the previous job for the same queue/scheduler is not yet finished a pipeline synchronization needs to be inserted. That's one of the reasons we wasn't able to unify the dependency handling yet. Christian. /* Deduplicate if we already depend on a fence from the same context. * This lets the size of the array of deps scale with the number of * engines involved, rather than the number of BOs.
Re: [Intel-gfx] [PATCH 04/47] drm/i915/guc: Add non blocking CTB send function
On 24.06.2021 17:49, Matthew Brost wrote: > On Thu, Jun 24, 2021 at 04:48:32PM +0200, Michal Wajdeczko wrote: >> >> >> On 24.06.2021 09:04, Matthew Brost wrote: >>> Add non blocking CTB send function, intel_guc_send_nb. GuC submission >>> will send CTBs in the critical path and does not need to wait for these >>> CTBs to complete before moving on, hence the need for this new function. >>> >>> The non-blocking CTB now must have a flow control mechanism to ensure >>> the buffer isn't overrun. A lazy spin wait is used as we believe the >>> flow control condition should be rare with a properly sized buffer. >>> >>> The function, intel_guc_send_nb, is exported in this patch but unused. >>> Several patches later in the series make use of this function. >>> >>> Signed-off-by: John Harrison >>> Signed-off-by: Matthew Brost >>> --- >>> drivers/gpu/drm/i915/gt/uc/intel_guc.h| 12 +++- >>> drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 77 +-- >>> drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 3 +- >>> 3 files changed, 82 insertions(+), 10 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h >>> b/drivers/gpu/drm/i915/gt/uc/intel_guc.h >>> index 4abc59f6f3cd..24b1df6ad4ae 100644 >>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h >>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h >>> @@ -74,7 +74,15 @@ static inline struct intel_guc *log_to_guc(struct >>> intel_guc_log *log) >>> static >>> inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 >>> len) >>> { >>> - return intel_guc_ct_send(&guc->ct, action, len, NULL, 0); >>> + return intel_guc_ct_send(&guc->ct, action, len, NULL, 0, 0); >>> +} >>> + >>> +#define INTEL_GUC_SEND_NB BIT(31) >> >> hmm, this flag really belongs to intel_guc_ct_send() so it should be >> defined as CTB flag near that function declaration >> > > I can move this up a few lines. > >>> +static >>> +inline int intel_guc_send_nb(struct intel_guc *guc, const u32 *action, u32 >>> len) >>> +{ >>> + return intel_guc_ct_send(&guc->ct, action, len, NULL, 0, >>> +INTEL_GUC_SEND_NB); >>> } >>> >>> static inline int >>> @@ -82,7 +90,7 @@ intel_guc_send_and_receive(struct intel_guc *guc, const >>> u32 *action, u32 len, >>>u32 *response_buf, u32 response_buf_size) >>> { >>> return intel_guc_ct_send(&guc->ct, action, len, >>> -response_buf, response_buf_size); >>> +response_buf, response_buf_size, 0); >>> } >>> >>> static inline void intel_guc_to_host_event_handler(struct intel_guc *guc) >>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c >>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c >>> index a17215920e58..c9a65d05911f 100644 >>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c >>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c >>> @@ -3,6 +3,11 @@ >>> * Copyright © 2016-2019 Intel Corporation >>> */ >>> >>> +#include >>> +#include >>> +#include >>> +#include >>> + >>> #include "i915_drv.h" >>> #include "intel_guc_ct.h" >>> #include "gt/intel_gt.h" >>> @@ -373,7 +378,7 @@ static void write_barrier(struct intel_guc_ct *ct) >>> static int ct_write(struct intel_guc_ct *ct, >>> const u32 *action, >>> u32 len /* in dwords */, >>> - u32 fence) >>> + u32 fence, u32 flags) >>> { >>> struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; >>> struct guc_ct_buffer_desc *desc = ctb->desc; >>> @@ -421,9 +426,13 @@ static int ct_write(struct intel_guc_ct *ct, >>> FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) | >>> FIELD_PREP(GUC_CTB_MSG_0_FENCE, fence); >>> >>> - hxg = FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | >>> - FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION | >>> -GUC_HXG_REQUEST_MSG_0_DATA0, action[0]); >>> + hxg = (flags & INTEL_GUC_SEND_NB) ? >>> + (FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) | >>> +FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | >>> + GUC_HXG_EVENT_MSG_0_DATA0, action[0])) : >>> + (FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | >>> +FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION | >>> + GUC_HXG_REQUEST_MSG_0_DATA0, action[0])); >> >> or as we already switched to accept and return whole HXG messages in >> guc_send_mmio() maybe we should do the same for CTB variant too and >> instead of using extra flag just let caller to prepare proper HXG header >> with HXG_EVENT type and then in CTB code just look at this type to make >> decision which code path to use >> > > Not sure I follow. Anyways could this be done in a follow up by you if > want this change. > >> note that existing callers should not be impacted, as full HXG header >> for the REQUEST message looks exactly the same as "action" code alone. >> >>> >>> CT_DEBUG(ct, "writing (tail %u) %*ph %*ph %*ph\
Re: [PATCH 02/11] drm/sched: Add dependency tracking
Am 24.06.21 um 16:39 schrieb Lucas Stach: Am Donnerstag, dem 24.06.2021 um 16:00 +0200 schrieb Daniel Vetter: Instead of just a callback we can just glue in the gem helpers that panfrost, v3d and lima currently use. There's really not that many ways to skin this cat. On the naming bikeshed: The idea for using _await_ to denote adding dependencies to a job comes from i915, where that's used quite extensively all over the place, in lots of datastructures. Signed-off-by: Daniel Vetter Cc: David Airlie Cc: Daniel Vetter Cc: Sumit Semwal Cc: "Christian König" Cc: Andrey Grodzovsky Cc: Lee Jones Cc: Nirmoy Das Cc: Boris Brezillon Cc: Luben Tuikov Cc: Alex Deucher Cc: Jack Zhang Cc: linux-me...@vger.kernel.org Cc: linaro-mm-...@lists.linaro.org --- drivers/gpu/drm/scheduler/sched_entity.c | 18 +++- drivers/gpu/drm/scheduler/sched_main.c | 103 +++ include/drm/gpu_scheduler.h | 31 ++- 3 files changed, 146 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index f7347c284886..b6f72fafd504 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -211,6 +211,19 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, job->sched->ops->free_job(job); } +static struct dma_fence * +drm_sched_job_dependency(struct drm_sched_job *job, +struct drm_sched_entity *entity) +{ + if (!xa_empty(&job->dependencies)) + return xa_erase(&job->dependencies, job->last_dependency++); Not sure how much it buys us now that you dedup fences before adding them to the xa, but we could avoid potentially avoid some ping-pong looping in the scheduler by checking if the fence we are about to return here is already signaled and skipping to the next one if so. You absolutely need this, especially for TTM based drivers since you basically need to add all the fences from all the BOs in you relocation list. When I initially implemented the dependency handling I've tried multiple approaches, including something similar to that one here. Not sure how well the performance will be, but I think we can revert to something more complicated rather easily when we find that it doesn't work as expected. One unresolved problem is that we need to track the last fence we optimized by looking at the scheduler instance. This is necessary since Vulkan dependencies don't work correctly otherwise. Amdgpu currently has a rather awkward workaround for that currently. But in general it looks like the right thing to do. Regards, Christian. Regards, Lucas + + if (job->sched->ops->dependency) + return job->sched->ops->dependency(job, entity); + + return NULL; +} + /** * drm_sched_entity_kill_jobs - Make sure all remaining jobs are killed * @@ -229,7 +242,7 @@ static void drm_sched_entity_kill_jobs(struct drm_sched_entity *entity) struct drm_sched_fence *s_fence = job->s_fence; /* Wait for all dependencies to avoid data corruptions */ - while ((f = job->sched->ops->dependency(job, entity))) + while ((f = drm_sched_job_dependency(job, entity))) dma_fence_wait(f, false); drm_sched_fence_scheduled(s_fence); @@ -419,7 +432,6 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) */ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) { - struct drm_gpu_scheduler *sched = entity->rq->sched; struct drm_sched_job *sched_job; sched_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue)); @@ -427,7 +439,7 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) return NULL; while ((entity->dependency = - sched->ops->dependency(sched_job, entity))) { + drm_sched_job_dependency(sched_job, entity))) { trace_drm_sched_job_wait_dep(sched_job, entity->dependency); if (drm_sched_entity_add_dependency_cb(entity)) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 70eefed17e06..370c336d383f 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -603,6 +603,8 @@ int drm_sched_job_init(struct drm_sched_job *job, INIT_LIST_HEAD(&job->list); + xa_init_flags(&job->dependencies, XA_FLAGS_ALLOC); + return 0; } EXPORT_SYMBOL(drm_sched_job_init); @@ -626,6 +628,98 @@ void drm_sched_job_arm(struct drm_sched_job *job) } EXPORT_SYMBOL(drm_sched_job_arm); +/** + * drm_sched_job_await_fence - adds the fence as a job dependency + * @job: scheduler job to add the dependencies to + * @fence: the dma_fence to add to the list of dependencies. + * + * Note that @fence is consumed in both the suc
Re: [Intel-gfx] [RFC PATCH 36/97] drm/i915/guc: Add non blocking CTB send function
On Thu, Jun 10, 2021 at 05:27:48PM +0200, Daniel Vetter wrote: > On Wed, Jun 09, 2021 at 04:10:23PM -0700, Matthew Brost wrote: > > On Tue, Jun 08, 2021 at 10:46:15AM +0200, Daniel Vetter wrote: > > > On Tue, Jun 8, 2021 at 10:39 AM Tvrtko Ursulin > > > wrote: > > > > > > > > > > > > On 07/06/2021 18:31, Matthew Brost wrote: > > > > > On Thu, May 27, 2021 at 04:11:50PM +0100, Tvrtko Ursulin wrote: > > > > >> > > > > >> On 27/05/2021 15:35, Matthew Brost wrote: > > > > >>> On Thu, May 27, 2021 at 11:02:24AM +0100, Tvrtko Ursulin wrote: > > > > > > > > On 26/05/2021 19:10, Matthew Brost wrote: > > > > > > > > [snip] > > > > > > > > > +static int ct_send_nb(struct intel_guc_ct *ct, > > > > > + const u32 *action, > > > > > + u32 len, > > > > > + u32 flags) > > > > > +{ > > > > > + struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; > > > > > + unsigned long spin_flags; > > > > > + u32 fence; > > > > > + int ret; > > > > > + > > > > > + spin_lock_irqsave(&ctb->lock, spin_flags); > > > > > + > > > > > + ret = ctb_has_room(ctb, len + 1); > > > > > + if (unlikely(ret)) > > > > > + goto out; > > > > > + > > > > > + fence = ct_get_next_fence(ct); > > > > > + ret = ct_write(ct, action, len, fence, flags); > > > > > + if (unlikely(ret)) > > > > > + goto out; > > > > > + > > > > > + intel_guc_notify(ct_to_guc(ct)); > > > > > + > > > > > +out: > > > > > + spin_unlock_irqrestore(&ctb->lock, spin_flags); > > > > > + > > > > > + return ret; > > > > > +} > > > > > + > > > > > static int ct_send(struct intel_guc_ct *ct, > > > > > const u32 *action, > > > > > u32 len, > > > > > @@ -473,6 +541,7 @@ static int ct_send(struct intel_guc_ct > > > > > *ct, > > > > > u32 response_buf_size, > > > > > u32 *status) > > > > > { > > > > > + struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; > > > > > struct ct_request request; > > > > > unsigned long flags; > > > > > u32 fence; > > > > > @@ -482,8 +551,20 @@ static int ct_send(struct intel_guc_ct > > > > > *ct, > > > > > GEM_BUG_ON(!len); > > > > > GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK); > > > > > GEM_BUG_ON(!response_buf && response_buf_size); > > > > > + might_sleep(); > > > > > > > > Sleep is just cond_resched below or there is more? > > > > > > > > >>> > > > > >>> Yes, the cond_resched. > > > > >>> > > > > > + /* > > > > > + * We use a lazy spin wait loop here as we believe that > > > > > if the CT > > > > > + * buffers are sized correctly the flow control > > > > > condition should be > > > > > + * rare. > > > > > + */ > > > > > +retry: > > > > > spin_lock_irqsave(&ct->ctbs.send.lock, flags); > > > > > + if (unlikely(!ctb_has_room(ctb, len + 1))) { > > > > > + spin_unlock_irqrestore(&ct->ctbs.send.lock, > > > > > flags); > > > > > + cond_resched(); > > > > > + goto retry; > > > > > + } > > > > > > > > If this patch is about adding a non-blocking send function, > > > > and below we can > > > > see that it creates a fork: > > > > > > > > intel_guc_ct_send: > > > > ... > > > > if (flags & INTEL_GUC_SEND_NB) > > > > return ct_send_nb(ct, action, len, flags); > > > > > > > > ret = ct_send(ct, action, len, response_buf, > > > > response_buf_size, &status); > > > > > > > > Then why is there a change in ct_send here, which is not the > > > > new > > > > non-blocking path? > > > > > > > > >>> > > > > >>> There is not a change to ct_send(), just to intel_guc_ct_send. > > > > >> > > > > >> I was doing by the diff which says: > > > > >> > > > > >> static int ct_send(struct intel_guc_ct *ct, > > > > >> const u32 *action, > > > > >> u32 len, > > > > >> @@ -473,6 +541,7 @@ static int ct_send(struct intel_guc_ct *ct, > > > > >> u32 response_buf_size, > > > > >> u32 *status) > > > > >> { > > > > >> +struct intel_
Re: [PATCH 45/47] drm/i915/guc: Include scheduling policies in the debugfs state dump
On Thu, Jun 24, 2021 at 12:05:14AM -0700, Matthew Brost wrote: > From: John Harrison > > Added the scheduling policy parameters to the 'guc_info' debugfs state > dump. > > Signed-off-by: John Harrison > Signed-off-by: Matthew Brost Reviewed-by: Matthew Brost > --- > drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 13 + > drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h | 2 ++ > drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c | 2 ++ > 3 files changed, 17 insertions(+) > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c > b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c > index c6d0b762d82c..b8182844aa00 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c > @@ -92,6 +92,19 @@ static void guc_policies_init(struct intel_guc *guc, > struct guc_policies *polici > policies->is_valid = 1; > } > > +void intel_guc_log_policy_info(struct intel_guc *guc, struct drm_printer *dp) > +{ > + struct __guc_ads_blob *blob = guc->ads_blob; > + > + if (unlikely(!blob)) > + return; > + > + drm_printf(dp, "Global scheduling policies:\n"); > + drm_printf(dp, " DPC promote time = %u\n", > blob->policies.dpc_promote_time); > + drm_printf(dp, " Max num work items = %u\n", > blob->policies.max_num_work_items); > + drm_printf(dp, " Flags = %u\n", > blob->policies.global_flags); > +} > + > static int guc_action_policies_update(struct intel_guc *guc, u32 > policy_offset) > { > u32 action[] = { > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h > b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h > index b00d3ae1113a..0fdcb3583601 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h > @@ -7,9 +7,11 @@ > #define _INTEL_GUC_ADS_H_ > > struct intel_guc; > +struct drm_printer; > > int intel_guc_ads_create(struct intel_guc *guc); > void intel_guc_ads_destroy(struct intel_guc *guc); > void intel_guc_ads_reset(struct intel_guc *guc); > +void intel_guc_log_policy_info(struct intel_guc *guc, struct drm_printer *p); > > #endif > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c > b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c > index 62b9ce0fafaa..9a03ff56e654 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c > @@ -10,6 +10,7 @@ > #include "intel_guc_debugfs.h" > #include "intel_guc_log_debugfs.h" > #include "gt/uc/intel_guc_ct.h" > +#include "gt/uc/intel_guc_ads.h" > #include "gt/uc/intel_guc_submission.h" > > static int guc_info_show(struct seq_file *m, void *data) > @@ -29,6 +30,7 @@ static int guc_info_show(struct seq_file *m, void *data) > > intel_guc_log_ct_info(&guc->ct, &p); > intel_guc_log_submission_info(guc, &p); > + intel_guc_log_policy_info(guc, &p); > > return 0; > } > -- > 2.28.0 >
Re: [Intel-gfx] [PATCH 40/47] drm/i915/guc: Enable GuC engine reset
On Thu, Jun 24, 2021 at 12:05:09AM -0700, Matthew Brost wrote: > From: John Harrison > > Clear the 'disable resets' flag to allow GuC to reset hung contexts > (detected via pre-emption timeout). > > Signed-off-by: John Harrison > Signed-off-by: Matthew Brost Reviewed-by: Matthew Brost > --- > drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 3 +-- > 1 file changed, 1 insertion(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c > b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c > index 9fd3c911f5fb..d3e86ab7508f 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c > @@ -81,8 +81,7 @@ static void guc_policies_init(struct guc_policies *policies) > { > policies->dpc_promote_time = GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US; > policies->max_num_work_items = GLOBAL_POLICY_MAX_NUM_WI; > - /* Disable automatic resets as not yet supported. */ > - policies->global_flags = GLOBAL_POLICY_DISABLE_ENGINE_RESET; > + policies->global_flags = 0; > policies->is_valid = 1; > } > > -- > 2.28.0 > > ___ > Intel-gfx mailing list > intel-...@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [PATCH v14 06/12] swiotlb: Use is_swiotlb_force_bounce for swiotlb data bouncing
On Thu, Jun 24, 2021 at 11:56 PM Konrad Rzeszutek Wilk wrote: > > On Thu, Jun 24, 2021 at 10:10:51AM -0400, Qian Cai wrote: > > > > > > On 6/24/2021 7:48 AM, Will Deacon wrote: > > > Ok, diff below which attempts to tackle the offset issue I mentioned as > > > well. Qian Cai -- please can you try with these changes? > > > > This works fine. > > Cool. Let me squash this patch in #6 and rebase the rest of them. > > Claire, could you check the devel/for-linus-5.14 say by end of today to > double check that I didn't mess anything up please? I just submitted v15 here (https://lore.kernel.org/patchwork/cover/1451322/) in case it's helpful. I'll double check of course. Thanks for the efforts! > > Will, > > Thank you for generating the fix! I am going to run it on x86 and Xen > to make sure all is good (granted last time I ran devel/for-linus-5.14 > on that setup I didn't see any errors so I need to double check > I didn't do something silly like run a wrong kernel). > > > > > > > > > > Will > > > > > > --->8 > > > > > > diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h > > > index 175b6c113ed8..39284ff2a6cd 100644 > > > --- a/include/linux/swiotlb.h > > > +++ b/include/linux/swiotlb.h > > > @@ -116,7 +116,9 @@ static inline bool is_swiotlb_buffer(struct device > > > *dev, phys_addr_t paddr) > > > > > > static inline bool is_swiotlb_force_bounce(struct device *dev) > > > { > > > - return dev->dma_io_tlb_mem->force_bounce; > > > + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; > > > + > > > + return mem && mem->force_bounce; > > > } > > > > > > void __init swiotlb_exit(void); > > > diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c > > > index 44be8258e27b..0ffbaae9fba2 100644 > > > --- a/kernel/dma/swiotlb.c > > > +++ b/kernel/dma/swiotlb.c > > > @@ -449,6 +449,7 @@ static int swiotlb_find_slots(struct device *dev, > > > phys_addr_t orig_addr, > > > dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1); > > > unsigned int nslots = nr_slots(alloc_size), stride; > > > unsigned int index, wrap, count = 0, i; > > > + unsigned int offset = swiotlb_align_offset(dev, orig_addr); > > > unsigned long flags; > > > > > > BUG_ON(!nslots); > > > @@ -497,7 +498,7 @@ static int swiotlb_find_slots(struct device *dev, > > > phys_addr_t orig_addr, > > > for (i = index; i < index + nslots; i++) { > > > mem->slots[i].list = 0; > > > mem->slots[i].alloc_size = > > > - alloc_size - ((i - index) << IO_TLB_SHIFT); > > > + alloc_size - (offset + ((i - index) << > > > IO_TLB_SHIFT)); > > > } > > > for (i = index - 1; > > > io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && > > >
Re: [Intel-gfx] [PATCH 39/47] drm/i915/guc: Don't complain about reset races
On Thu, Jun 24, 2021 at 12:05:08AM -0700, Matthew Brost wrote: > From: John Harrison > > It is impossible to seal all race conditions of resets occurring > concurrent to other operations. At least, not without introducing > excesive mutex locking. Instead, don't complain if it occurs. In > particular, don't complain if trying to send a H2G during a reset. > Whatever the H2G was about should get redone once the reset is over. > > Signed-off-by: John Harrison > Signed-off-by: Matthew Brost Reviewed-by: Matthew Brost > --- > drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 5 - > drivers/gpu/drm/i915/gt/uc/intel_uc.c | 3 +++ > drivers/gpu/drm/i915/gt/uc/intel_uc.h | 2 ++ > 3 files changed, 9 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > index dd6177c8d75c..3b32755f892e 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > @@ -727,7 +727,10 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 > *action, u32 len, > int ret; > > if (unlikely(!ct->enabled)) { > - WARN(1, "Unexpected send: action=%#x\n", *action); > + struct intel_guc *guc = ct_to_guc(ct); > + struct intel_uc *uc = container_of(guc, struct intel_uc, guc); > + > + WARN(!uc->reset_in_progress, "Unexpected send: action=%#x\n", > *action); > return -ENODEV; > } > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c > b/drivers/gpu/drm/i915/gt/uc/intel_uc.c > index b523a8521351..77c1fe2ed883 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c > @@ -550,6 +550,7 @@ void intel_uc_reset_prepare(struct intel_uc *uc) > { > struct intel_guc *guc = &uc->guc; > > + uc->reset_in_progress = true; > > /* Nothing to do if GuC isn't supported */ > if (!intel_uc_supports_guc(uc)) > @@ -579,6 +580,8 @@ void intel_uc_reset_finish(struct intel_uc *uc) > { > struct intel_guc *guc = &uc->guc; > > + uc->reset_in_progress = false; > + > /* Firmware expected to be running when this function is called */ > if (intel_guc_is_fw_running(guc) && intel_uc_uses_guc_submission(uc)) > intel_guc_submission_reset_finish(guc); > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h > b/drivers/gpu/drm/i915/gt/uc/intel_uc.h > index eaa3202192ac..91315e3f1c58 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h > +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h > @@ -30,6 +30,8 @@ struct intel_uc { > > /* Snapshot of GuC log from last failed load */ > struct drm_i915_gem_object *load_err_log; > + > + bool reset_in_progress; > }; > > void intel_uc_init_early(struct intel_uc *uc); > -- > 2.28.0 > > ___ > Intel-gfx mailing list > intel-...@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 04/47] drm/i915/guc: Add non blocking CTB send function
On Thu, Jun 24, 2021 at 04:48:32PM +0200, Michal Wajdeczko wrote: > > > On 24.06.2021 09:04, Matthew Brost wrote: > > Add non blocking CTB send function, intel_guc_send_nb. GuC submission > > will send CTBs in the critical path and does not need to wait for these > > CTBs to complete before moving on, hence the need for this new function. > > > > The non-blocking CTB now must have a flow control mechanism to ensure > > the buffer isn't overrun. A lazy spin wait is used as we believe the > > flow control condition should be rare with a properly sized buffer. > > > > The function, intel_guc_send_nb, is exported in this patch but unused. > > Several patches later in the series make use of this function. > > > > Signed-off-by: John Harrison > > Signed-off-by: Matthew Brost > > --- > > drivers/gpu/drm/i915/gt/uc/intel_guc.h| 12 +++- > > drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 77 +-- > > drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 3 +- > > 3 files changed, 82 insertions(+), 10 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h > > b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > > index 4abc59f6f3cd..24b1df6ad4ae 100644 > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > > @@ -74,7 +74,15 @@ static inline struct intel_guc *log_to_guc(struct > > intel_guc_log *log) > > static > > inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 > > len) > > { > > - return intel_guc_ct_send(&guc->ct, action, len, NULL, 0); > > + return intel_guc_ct_send(&guc->ct, action, len, NULL, 0, 0); > > +} > > + > > +#define INTEL_GUC_SEND_NB BIT(31) > > hmm, this flag really belongs to intel_guc_ct_send() so it should be > defined as CTB flag near that function declaration > I can move this up a few lines. > > +static > > +inline int intel_guc_send_nb(struct intel_guc *guc, const u32 *action, u32 > > len) > > +{ > > + return intel_guc_ct_send(&guc->ct, action, len, NULL, 0, > > +INTEL_GUC_SEND_NB); > > } > > > > static inline int > > @@ -82,7 +90,7 @@ intel_guc_send_and_receive(struct intel_guc *guc, const > > u32 *action, u32 len, > >u32 *response_buf, u32 response_buf_size) > > { > > return intel_guc_ct_send(&guc->ct, action, len, > > -response_buf, response_buf_size); > > +response_buf, response_buf_size, 0); > > } > > > > static inline void intel_guc_to_host_event_handler(struct intel_guc *guc) > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > > b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > > index a17215920e58..c9a65d05911f 100644 > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > > @@ -3,6 +3,11 @@ > > * Copyright © 2016-2019 Intel Corporation > > */ > > > > +#include > > +#include > > +#include > > +#include > > + > > #include "i915_drv.h" > > #include "intel_guc_ct.h" > > #include "gt/intel_gt.h" > > @@ -373,7 +378,7 @@ static void write_barrier(struct intel_guc_ct *ct) > > static int ct_write(struct intel_guc_ct *ct, > > const u32 *action, > > u32 len /* in dwords */, > > - u32 fence) > > + u32 fence, u32 flags) > > { > > struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; > > struct guc_ct_buffer_desc *desc = ctb->desc; > > @@ -421,9 +426,13 @@ static int ct_write(struct intel_guc_ct *ct, > > FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) | > > FIELD_PREP(GUC_CTB_MSG_0_FENCE, fence); > > > > - hxg = FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | > > - FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION | > > -GUC_HXG_REQUEST_MSG_0_DATA0, action[0]); > > + hxg = (flags & INTEL_GUC_SEND_NB) ? > > + (FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) | > > +FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | > > + GUC_HXG_EVENT_MSG_0_DATA0, action[0])) : > > + (FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | > > +FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION | > > + GUC_HXG_REQUEST_MSG_0_DATA0, action[0])); > > or as we already switched to accept and return whole HXG messages in > guc_send_mmio() maybe we should do the same for CTB variant too and > instead of using extra flag just let caller to prepare proper HXG header > with HXG_EVENT type and then in CTB code just look at this type to make > decision which code path to use > Not sure I follow. Anyways could this be done in a follow up by you if want this change. > note that existing callers should not be impacted, as full HXG header > for the REQUEST message looks exactly the same as "action" code alone. > > > > > CT_DEBUG(ct, "writing (tail %u) %*ph %*ph %*ph\n", > > tail, 4, &header, 4, &hxg, 4 * (len - 1), &acti
[PATCH v15 12/12] of: Add plumbing for restricted DMA pool
If a device is not behind an IOMMU, we look up the device node and set up the restricted DMA when the restricted-dma-pool is presented. Signed-off-by: Claire Chang Tested-by: Stefano Stabellini Tested-by: Will Deacon --- drivers/of/address.c| 33 + drivers/of/device.c | 3 +++ drivers/of/of_private.h | 6 ++ 3 files changed, 42 insertions(+) diff --git a/drivers/of/address.c b/drivers/of/address.c index 73ddf2540f3f..cdf700fba5c4 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -1022,6 +1023,38 @@ int of_dma_get_range(struct device_node *np, const struct bus_dma_region **map) of_node_put(node); return ret; } + +int of_dma_set_restricted_buffer(struct device *dev, struct device_node *np) +{ + struct device_node *node, *of_node = dev->of_node; + int count, i; + + count = of_property_count_elems_of_size(of_node, "memory-region", + sizeof(u32)); + /* +* If dev->of_node doesn't exist or doesn't contain memory-region, try +* the OF node having DMA configuration. +*/ + if (count <= 0) { + of_node = np; + count = of_property_count_elems_of_size( + of_node, "memory-region", sizeof(u32)); + } + + for (i = 0; i < count; i++) { + node = of_parse_phandle(of_node, "memory-region", i); + /* +* There might be multiple memory regions, but only one +* restricted-dma-pool region is allowed. +*/ + if (of_device_is_compatible(node, "restricted-dma-pool") && + of_device_is_available(node)) + return of_reserved_mem_device_init_by_idx(dev, of_node, + i); + } + + return 0; +} #endif /* CONFIG_HAS_DMA */ /** diff --git a/drivers/of/device.c b/drivers/of/device.c index 6cb86de404f1..e68316836a7a 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -165,6 +165,9 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, arch_setup_dma_ops(dev, dma_start, size, iommu, coherent); + if (!iommu) + return of_dma_set_restricted_buffer(dev, np); + return 0; } EXPORT_SYMBOL_GPL(of_dma_configure_id); diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h index d9e6a324de0a..25cebbed5f02 100644 --- a/drivers/of/of_private.h +++ b/drivers/of/of_private.h @@ -161,12 +161,18 @@ struct bus_dma_region; #if defined(CONFIG_OF_ADDRESS) && defined(CONFIG_HAS_DMA) int of_dma_get_range(struct device_node *np, const struct bus_dma_region **map); +int of_dma_set_restricted_buffer(struct device *dev, struct device_node *np); #else static inline int of_dma_get_range(struct device_node *np, const struct bus_dma_region **map) { return -ENODEV; } +static inline int of_dma_set_restricted_buffer(struct device *dev, + struct device_node *np) +{ + return -ENODEV; +} #endif #endif /* _LINUX_OF_PRIVATE_H */ -- 2.32.0.288.g62a8d224e6-goog
[PATCH v15 11/12] dt-bindings: of: Add restricted DMA pool
Introduce the new compatible string, restricted-dma-pool, for restricted DMA. One can specify the address and length of the restricted DMA memory region by restricted-dma-pool in the reserved-memory node. Signed-off-by: Claire Chang Tested-by: Stefano Stabellini Tested-by: Will Deacon --- .../reserved-memory/reserved-memory.txt | 36 +-- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt index e8d3096d922c..39b5f4c5a511 100644 --- a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt +++ b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt @@ -51,6 +51,23 @@ compatible (optional) - standard definition used as a shared pool of DMA buffers for a set of devices. It can be used by an operating system to instantiate the necessary pool management subsystem if necessary. +- restricted-dma-pool: This indicates a region of memory meant to be + used as a pool of restricted DMA buffers for a set of devices. The + memory region would be the only region accessible to those devices. + When using this, the no-map and reusable properties must not be set, + so the operating system can create a virtual mapping that will be used + for synchronization. The main purpose for restricted DMA is to + mitigate the lack of DMA access control on systems without an IOMMU, + which could result in the DMA accessing the system memory at + unexpected times and/or unexpected addresses, possibly leading to data + leakage or corruption. The feature on its own provides a basic level + of protection against the DMA overwriting buffer contents at + unexpected times. However, to protect against general data leakage and + system memory corruption, the system needs to provide way to lock down + the memory access, e.g., MPU. Note that since coherent allocation + needs remapping, one must set up another device coherent pool by + shared-dma-pool and use dma_alloc_from_dev_coherent instead for atomic + coherent allocation. - vendor specific string in the form ,[-] no-map (optional) - empty property - Indicates the operating system must not create a virtual mapping @@ -85,10 +102,11 @@ memory-region-names (optional) - a list of names, one for each corresponding Example --- -This example defines 3 contiguous regions are defined for Linux kernel: +This example defines 4 contiguous regions for Linux kernel: one default of all device drivers (named linux,cma@7200 and 64MiB in size), -one dedicated to the framebuffer device (named framebuffer@7800, 8MiB), and -one for multimedia processing (named multimedia-memory@7700, 64MiB). +one dedicated to the framebuffer device (named framebuffer@7800, 8MiB), +one for multimedia processing (named multimedia-memory@7700, 64MiB), and +one for restricted dma pool (named restricted_dma_reserved@0x5000, 64MiB). / { #address-cells = <1>; @@ -120,6 +138,11 @@ one for multimedia processing (named multimedia-memory@7700, 64MiB). compatible = "acme,multimedia-memory"; reg = <0x7700 0x400>; }; + + restricted_dma_reserved: restricted_dma_reserved { + compatible = "restricted-dma-pool"; + reg = <0x5000 0x400>; + }; }; /* ... */ @@ -138,4 +161,11 @@ one for multimedia processing (named multimedia-memory@7700, 64MiB). memory-region = <&multimedia_reserved>; /* ... */ }; + + pcie_device: pcie_device@0,0 { + reg = <0x8301 0x0 0x 0x0 0x0010 + 0x8301 0x0 0x0010 0x0 0x0010>; + memory-region = <&restricted_dma_reserved>; + /* ... */ + }; }; -- 2.32.0.288.g62a8d224e6-goog
[PATCH v15 10/12] swiotlb: Add restricted DMA pool initialization
Add the initialization function to create restricted DMA pools from matching reserved-memory nodes. Regardless of swiotlb setting, the restricted DMA pool is preferred if available. The restricted DMA pools provide a basic level of protection against the DMA overwriting buffer contents at unexpected times. However, to protect against general data leakage and system memory corruption, the system needs to provide a way to lock down the memory access, e.g., MPU. Signed-off-by: Claire Chang Reviewed-by: Christoph Hellwig Tested-by: Stefano Stabellini Tested-by: Will Deacon --- include/linux/swiotlb.h | 3 +- kernel/dma/Kconfig | 14 kernel/dma/swiotlb.c| 76 + 3 files changed, 92 insertions(+), 1 deletion(-) diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 3b9454d1e498..39284ff2a6cd 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -73,7 +73,8 @@ extern enum swiotlb_force swiotlb_force; * range check to see if the memory was in fact allocated by this * API. * @nslabs:The number of IO TLB blocks (in groups of 64) between @start and - * @end. This is command line adjustable via setup_io_tlb_npages. + * @end. For default swiotlb, this is command line adjustable via + * setup_io_tlb_npages. * @used: The number of used IO TLB block. * @list: The free list describing the number of free entries available * from each index. diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 77b405508743..3e961dc39634 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -80,6 +80,20 @@ config SWIOTLB bool select NEED_DMA_MAP_STATE +config DMA_RESTRICTED_POOL + bool "DMA Restricted Pool" + depends on OF && OF_RESERVED_MEM + select SWIOTLB + help + This enables support for restricted DMA pools which provide a level of + DMA memory protection on systems with limited hardware protection + capabilities, such as those lacking an IOMMU. + + For more information see + + and . + If unsure, say "n". + # # Should be selected if we can mmap non-coherent mappings to userspace. # The only thing that is really required is a way to set an uncached bit diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 6a7c6e30eb4b..3baf49c9b766 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -39,6 +39,13 @@ #ifdef CONFIG_DEBUG_FS #include #endif +#ifdef CONFIG_DMA_RESTRICTED_POOL +#include +#include +#include +#include +#include +#endif #include #include @@ -737,4 +744,73 @@ bool swiotlb_free(struct device *dev, struct page *page, size_t size) return true; } +static int rmem_swiotlb_device_init(struct reserved_mem *rmem, + struct device *dev) +{ + struct io_tlb_mem *mem = rmem->priv; + unsigned long nslabs = rmem->size >> IO_TLB_SHIFT; + + /* +* Since multiple devices can share the same pool, the private data, +* io_tlb_mem struct, will be initialized by the first device attached +* to it. +*/ + if (!mem) { + mem = kzalloc(struct_size(mem, slots, nslabs), GFP_KERNEL); + if (!mem) + return -ENOMEM; + + set_memory_decrypted((unsigned long)phys_to_virt(rmem->base), +rmem->size >> PAGE_SHIFT); + swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, false); + mem->force_bounce = true; + mem->for_alloc = true; + + rmem->priv = mem; + + if (IS_ENABLED(CONFIG_DEBUG_FS)) { + mem->debugfs = + debugfs_create_dir(rmem->name, debugfs_dir); + swiotlb_create_debugfs_files(mem); + } + } + + dev->dma_io_tlb_mem = mem; + + return 0; +} + +static void rmem_swiotlb_device_release(struct reserved_mem *rmem, + struct device *dev) +{ + dev->dma_io_tlb_mem = io_tlb_default_mem; +} + +static const struct reserved_mem_ops rmem_swiotlb_ops = { + .device_init = rmem_swiotlb_device_init, + .device_release = rmem_swiotlb_device_release, +}; + +static int __init rmem_swiotlb_setup(struct reserved_mem *rmem) +{ + unsigned long node = rmem->fdt_node; + + if (of_get_flat_dt_prop(node, "reusable", NULL) || + of_get_flat_dt_prop(node, "linux,cma-default", NULL) || + of_get_flat_dt_prop(node, "linux,dma-default", NULL) || + of_get_flat_dt_prop(node, "no-map", NULL)) + return -EINVAL; + + if (PageHighMem(pfn_to_page(PHYS_PFN(rmem->base { + pr_err("Restricted DMA pool must be accessible within the linear mapping."); + return -EINVAL
[PATCH v15 09/12] swiotlb: Add restricted DMA alloc/free support
Add the functions, swiotlb_{alloc,free} and is_swiotlb_for_alloc to support the memory allocation from restricted DMA pool. The restricted DMA pool is preferred if available. Note that since coherent allocation needs remapping, one must set up another device coherent pool by shared-dma-pool and use dma_alloc_from_dev_coherent instead for atomic coherent allocation. Signed-off-by: Claire Chang Reviewed-by: Christoph Hellwig Tested-by: Stefano Stabellini Tested-by: Will Deacon Acked-by: Stefano Stabellini --- include/linux/swiotlb.h | 26 ++ kernel/dma/direct.c | 49 +++-- kernel/dma/swiotlb.c| 38 ++-- 3 files changed, 99 insertions(+), 14 deletions(-) diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index da348671b0d5..3b9454d1e498 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -85,6 +85,7 @@ extern enum swiotlb_force swiotlb_force; * @debugfs: The dentry to debugfs. * @late_alloc:%true if allocated using the page allocator * @force_bounce: %true if swiotlb bouncing is forced + * @for_alloc: %true if the pool is used for memory allocation */ struct io_tlb_mem { phys_addr_t start; @@ -96,6 +97,7 @@ struct io_tlb_mem { struct dentry *debugfs; bool late_alloc; bool force_bounce; + bool for_alloc; struct io_tlb_slot { phys_addr_t orig_addr; size_t alloc_size; @@ -158,4 +160,28 @@ static inline void swiotlb_adjust_size(unsigned long size) extern void swiotlb_print_info(void); extern void swiotlb_set_max_segment(unsigned int); +#ifdef CONFIG_DMA_RESTRICTED_POOL +struct page *swiotlb_alloc(struct device *dev, size_t size); +bool swiotlb_free(struct device *dev, struct page *page, size_t size); + +static inline bool is_swiotlb_for_alloc(struct device *dev) +{ + return dev->dma_io_tlb_mem->for_alloc; +} +#else +static inline struct page *swiotlb_alloc(struct device *dev, size_t size) +{ + return NULL; +} +static inline bool swiotlb_free(struct device *dev, struct page *page, + size_t size) +{ + return false; +} +static inline bool is_swiotlb_for_alloc(struct device *dev) +{ + return false; +} +#endif /* CONFIG_DMA_RESTRICTED_POOL */ + #endif /* __LINUX_SWIOTLB_H */ diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index a92465b4eb12..2de33e5d302b 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -75,6 +75,15 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); } +static void __dma_direct_free_pages(struct device *dev, struct page *page, + size_t size) +{ + if (IS_ENABLED(CONFIG_DMA_RESTRICTED_POOL) && + swiotlb_free(dev, page, size)) + return; + dma_free_contiguous(dev, page, size); +} + static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, gfp_t gfp) { @@ -86,6 +95,16 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, &phys_limit); + if (IS_ENABLED(CONFIG_DMA_RESTRICTED_POOL) && + is_swiotlb_for_alloc(dev)) { + page = swiotlb_alloc(dev, size); + if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { + __dma_direct_free_pages(dev, page, size); + return NULL; + } + return page; + } + page = dma_alloc_contiguous(dev, size, gfp); if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { dma_free_contiguous(dev, page, size); @@ -142,7 +161,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, gfp |= __GFP_NOWARN; if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && - !force_dma_unencrypted(dev)) { + !force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev)) { page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO); if (!page) return NULL; @@ -155,18 +174,23 @@ void *dma_direct_alloc(struct device *dev, size_t size, } if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && - !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && - !dev_is_dma_coherent(dev)) + !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !dev_is_dma_coherent(dev) && + !is_swiotlb_for_alloc(dev)) return arch_dma_alloc(dev, size, dma_handle, gfp, attrs); /* * Remapping or decrypting memory may block. If either is required and * we can't block, allocate the memory from the atomic pools. +* If restricted DMA (i.e., is_swiotlb_for_alloc) i
[PATCH v15 07/12] swiotlb: Move alloc_size to swiotlb_find_slots
Rename find_slots to swiotlb_find_slots and move the maintenance of alloc_size to it for better code reusability later. Signed-off-by: Claire Chang Reviewed-by: Christoph Hellwig Tested-by: Stefano Stabellini Tested-by: Will Deacon --- kernel/dma/swiotlb.c | 17 + 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 0d294bbf274c..b41d16e92cf6 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -432,8 +432,8 @@ static unsigned int wrap_index(struct io_tlb_mem *mem, unsigned int index) * Find a suitable number of IO TLB entries size that will fit this request and * allocate a buffer from that IO TLB pool. */ -static int find_slots(struct device *dev, phys_addr_t orig_addr, - size_t alloc_size) +static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, + size_t alloc_size) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; unsigned long boundary_mask = dma_get_seg_boundary(dev); @@ -444,6 +444,7 @@ static int find_slots(struct device *dev, phys_addr_t orig_addr, dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1); unsigned int nslots = nr_slots(alloc_size), stride; unsigned int index, wrap, count = 0, i; + unsigned int offset = swiotlb_align_offset(dev, orig_addr); unsigned long flags; BUG_ON(!nslots); @@ -488,8 +489,11 @@ static int find_slots(struct device *dev, phys_addr_t orig_addr, return -1; found: - for (i = index; i < index + nslots; i++) + for (i = index; i < index + nslots; i++) { mem->slots[i].list = 0; + mem->slots[i].alloc_size = + alloc_size - (offset + ((i - index) << IO_TLB_SHIFT)); + } for (i = index - 1; io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && mem->slots[i].list; i--) @@ -530,7 +534,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, return (phys_addr_t)DMA_MAPPING_ERROR; } - index = find_slots(dev, orig_addr, alloc_size + offset); + index = swiotlb_find_slots(dev, orig_addr, alloc_size + offset); if (index == -1) { if (!(attrs & DMA_ATTR_NO_WARN)) dev_warn_ratelimited(dev, @@ -544,11 +548,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, * This is needed when we sync the memory. Then we sync the buffer if * needed. */ - for (i = 0; i < nr_slots(alloc_size + offset); i++) { + for (i = 0; i < nr_slots(alloc_size + offset); i++) mem->slots[index + i].orig_addr = slot_addr(orig_addr, i); - mem->slots[index + i].alloc_size = - alloc_size - (i << IO_TLB_SHIFT); - } tlb_addr = slot_addr(mem->start, index) + offset; if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) -- 2.32.0.288.g62a8d224e6-goog