Re: [RFC v2 05/12] drm/i915/svm: Page table mirroring support

2019-12-23 Thread Jason Gunthorpe
On Wed, Dec 18, 2019 at 02:41:47PM -0800, Niranjana Vishwanathapura wrote:
> > > +static u32 i915_svm_build_sg(struct i915_address_space *vm,
> > > +  struct hmm_range *range,
> > > +  struct sg_table *st)
> > > +{
> > > + struct scatterlist *sg;
> > > + u32 sg_page_sizes = 0;
> > > + u64 i, npages;
> > > +
> > > + sg = NULL;
> > > + st->nents = 0;
> > > + npages = (range->end - range->start) / PAGE_SIZE;
> > > +
> > > + /*
> > > +  * No need to dma map the host pages and later unmap it, as
> > > +  * GPU is not allowed to access it with SVM.
> > > +  * XXX: Need to dma map host pages for integrated graphics while
> > > +  * extending SVM support there.
> > > +  */
> > > + for (i = 0; i < npages; i++) {
> > > + u64 addr = range->pfns[i] & ~((1UL << range->pfn_shift) - 1);
> > > +
> > > + if (sg && (addr == (sg_dma_address(sg) + sg->length))) {
> > > + sg->length += PAGE_SIZE;
> > > + sg_dma_len(sg) += PAGE_SIZE;
> > > + continue;
> > > + }
> > > +
> > > + if (sg)
> > > + sg_page_sizes |= sg->length;
> > > +
> > > + sg =  sg ? __sg_next(sg) : st->sgl;
> > > + sg_dma_address(sg) = addr;
> > > + sg_dma_len(sg) = PAGE_SIZE;
> > 
> > This still can't be like this - assigning pfn to 'dma_address' is
> > fundamentally wrong.
> > 
> > Whatever explanation you had, this needs to be fixed up first before we get
> > to this patch.
> > 
> 
> The pfn is converted into a device address which goes into sg_dma_address.
> Ok, let me think about what else we can do here.

If you combine this with the other function and make it so only
DEVICE_PRIVATE pages get converted toa dma_address with out dma_map,
then that would make sense.

> > > +static int
> > > +i915_svm_invalidate_range_start(struct mmu_notifier *mn,
> > > + const struct mmu_notifier_range *update)
> > > +{
> > > + struct i915_svm *svm = container_of(mn, struct i915_svm, notifier);
> > > + unsigned long length = update->end - update->start;
> > > +
> > > + DRM_DEBUG_DRIVER("start 0x%lx length 0x%lx\n", update->start, length);
> > > + if (!mmu_notifier_range_blockable(update))
> > > + return -EAGAIN;
> > > +
> > > + i915_gem_vm_unbind_svm_buffer(svm->vm, update->start, length);
> > > + return 0;
> > > +}
> > 
> > I still think you should strive for a better design than putting a
> > notifier across the entire address space..
> > 
> 
> Yah, thought it could be later optimization.
> If I think about it, it has be be a new user API to set the range,
> or an intermediate data structure for tracking the bound ranges.
> Will look into it.

Well, there are lots of options. Like I said, implicit ODP uses a
level of the device page table to attach the notifier.

There are many performance trade offs here, it depends what works best
for your work load I suppose. But usually the fault path is the fast
thing, so I would think to avoid registering mmu_intervals on it and
accept the higher probability of collisions.

Jason
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [RFC v2 05/12] drm/i915/svm: Page table mirroring support

2019-12-22 Thread Niranjana Vishwanathapura

On Fri, Dec 20, 2019 at 01:45:33PM +, Jason Gunthorpe wrote:

On Wed, Dec 18, 2019 at 02:41:47PM -0800, Niranjana Vishwanathapura wrote:

> > +static u32 i915_svm_build_sg(struct i915_address_space *vm,
> > +  struct hmm_range *range,
> > +  struct sg_table *st)
> > +{
> > + struct scatterlist *sg;
> > + u32 sg_page_sizes = 0;
> > + u64 i, npages;
> > +
> > + sg = NULL;
> > + st->nents = 0;
> > + npages = (range->end - range->start) / PAGE_SIZE;
> > +
> > + /*
> > +  * No need to dma map the host pages and later unmap it, as
> > +  * GPU is not allowed to access it with SVM.
> > +  * XXX: Need to dma map host pages for integrated graphics while
> > +  * extending SVM support there.
> > +  */
> > + for (i = 0; i < npages; i++) {
> > + u64 addr = range->pfns[i] & ~((1UL << range->pfn_shift) - 1);
> > +
> > + if (sg && (addr == (sg_dma_address(sg) + sg->length))) {
> > + sg->length += PAGE_SIZE;
> > + sg_dma_len(sg) += PAGE_SIZE;
> > + continue;
> > + }
> > +
> > + if (sg)
> > + sg_page_sizes |= sg->length;
> > +
> > + sg =  sg ? __sg_next(sg) : st->sgl;
> > + sg_dma_address(sg) = addr;
> > + sg_dma_len(sg) = PAGE_SIZE;
>
> This still can't be like this - assigning pfn to 'dma_address' is
> fundamentally wrong.
>
> Whatever explanation you had, this needs to be fixed up first before we get
> to this patch.
>

The pfn is converted into a device address which goes into sg_dma_address.
Ok, let me think about what else we can do here.


If you combine this with the other function and make it so only
DEVICE_PRIVATE pages get converted toa dma_address with out dma_map,
then that would make sense.



Ok thanks, will do that.


> > +static int
> > +i915_svm_invalidate_range_start(struct mmu_notifier *mn,
> > + const struct mmu_notifier_range *update)
> > +{
> > + struct i915_svm *svm = container_of(mn, struct i915_svm, notifier);
> > + unsigned long length = update->end - update->start;
> > +
> > + DRM_DEBUG_DRIVER("start 0x%lx length 0x%lx\n", update->start, length);
> > + if (!mmu_notifier_range_blockable(update))
> > + return -EAGAIN;
> > +
> > + i915_gem_vm_unbind_svm_buffer(svm->vm, update->start, length);
> > + return 0;
> > +}
>
> I still think you should strive for a better design than putting a
> notifier across the entire address space..
>

Yah, thought it could be later optimization.
If I think about it, it has be be a new user API to set the range,
or an intermediate data structure for tracking the bound ranges.
Will look into it.


Well, there are lots of options. Like I said, implicit ODP uses a
level of the device page table to attach the notifier.

There are many performance trade offs here, it depends what works best
for your work load I suppose. But usually the fault path is the fast
thing, so I would think to avoid registering mmu_intervals on it and
accept the higher probability of collisions.



Ok thanks, yah, solution should tune for the performant path. Will look into it.

Thanks,
Niranjana


Jason

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [RFC v2 05/12] drm/i915/svm: Page table mirroring support

2019-12-18 Thread Niranjana Vishwanathapura

On Tue, Dec 17, 2019 at 08:31:07PM +, Jason Gunthorpe wrote:

On Fri, Dec 13, 2019 at 01:56:07PM -0800, Niranjana Vishwanathapura wrote:

+static struct i915_svm *vm_get_svm(struct i915_address_space *vm)
+{
+   struct i915_svm *svm = vm->svm;
+
+   mutex_lock(>svm_mutex);
+   if (svm && !kref_get_unless_zero(>ref))
+   svm = NULL;


Quite strange to see a get_unless_zero under a lock..



Thanks.

Yah I agree (construct taken from a differnt place).
It should go away once I swith to mmu_notifier_get/put.


+static void release_svm(struct kref *ref)
+{
+   struct i915_svm *svm = container_of(ref, typeof(*svm), ref);
+   struct i915_address_space *vm = svm->vm;
+
+   mmu_notifier_unregister(>notifier, svm->notifier.mm);


This would be a lot better to use mmu_notifier_put to manage the
reference and deallocation.



Yah, have that in mind, will use that.


+static u32 i915_svm_build_sg(struct i915_address_space *vm,
+struct hmm_range *range,
+struct sg_table *st)
+{
+   struct scatterlist *sg;
+   u32 sg_page_sizes = 0;
+   u64 i, npages;
+
+   sg = NULL;
+   st->nents = 0;
+   npages = (range->end - range->start) / PAGE_SIZE;
+
+   /*
+* No need to dma map the host pages and later unmap it, as
+* GPU is not allowed to access it with SVM.
+* XXX: Need to dma map host pages for integrated graphics while
+* extending SVM support there.
+*/
+   for (i = 0; i < npages; i++) {
+   u64 addr = range->pfns[i] & ~((1UL << range->pfn_shift) - 1);
+
+   if (sg && (addr == (sg_dma_address(sg) + sg->length))) {
+   sg->length += PAGE_SIZE;
+   sg_dma_len(sg) += PAGE_SIZE;
+   continue;
+   }
+
+   if (sg)
+   sg_page_sizes |= sg->length;
+
+   sg =  sg ? __sg_next(sg) : st->sgl;
+   sg_dma_address(sg) = addr;
+   sg_dma_len(sg) = PAGE_SIZE;


This still can't be like this - assigning pfn to 'dma_address' is
fundamentally wrong.

Whatever explanation you had, this needs to be fixed up first before we get
to this patch.



The pfn is converted into a device address which goes into sg_dma_address.
Ok, let me think about what else we can do here.
As device addresses are not dma mapped, may be we can look at it as direct
mapped (__phys_to_dma())? Or perhaps define our own sgl.
Not sure, will look into it.


+static int i915_range_fault(struct svm_notifier *sn,
+   struct drm_i915_gem_vm_bind *args,
+   struct sg_table *st, u64 *pfns)
+{
+   unsigned long timeout =
+   jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+   /* Have HMM fault pages within the fault window to the GPU. */
+   struct hmm_range range = {
+   .notifier = >notifier,
+   .start = sn->notifier.interval_tree.start,
+   .end = sn->notifier.interval_tree.last + 1,
+   .pfns = pfns,
+   .pfn_shift = PAGE_SHIFT,
+   .flags = i915_range_flags,
+   .values = i915_range_values,
+   .default_flags = (range.flags[HMM_PFN_VALID] |
+ ((args->flags & I915_GEM_VM_BIND_READONLY) ?
+  0 : range.flags[HMM_PFN_WRITE])),
+   .pfn_flags_mask = 0,
+
+   };
+   struct i915_svm *svm = sn->svm;
+   struct mm_struct *mm = sn->notifier.mm;
+   struct i915_address_space *vm = svm->vm;
+   u32 sg_page_sizes;
+   u64 flags;
+   long ret;
+
+   while (true) {
+   if (time_after(jiffies, timeout))
+   return -EBUSY;
+
+   range.notifier_seq = mmu_interval_read_begin(range.notifier);
+   down_read(>mmap_sem);
+   ret = hmm_range_fault(, 0);
+   up_read(>mmap_sem);
+   if (ret <= 0) {
+   if (ret == 0 || ret == -EBUSY)
+   continue;
+   return ret;
+   }
+
+   sg_page_sizes = i915_svm_build_sg(vm, , st);
+   mutex_lock(>mutex);
+   if (mmu_interval_read_retry(range.notifier,
+   range.notifier_seq)) {
+   mutex_unlock(>mutex);
+   continue;
+   }
+   break;
+   }
+
+   flags = (args->flags & I915_GEM_VM_BIND_READONLY) ?
+   I915_GTT_SVM_READONLY : 0;
+   ret = svm_bind_addr_commit(vm, args->start, args->length,
+  flags, st, sg_page_sizes);
+   mutex_unlock(>mutex);


This looks better..


+int i915_gem_vm_bind_svm_buffer(struct i915_address_space *vm,
+   struct drm_i915_gem_vm_bind *args)

Re: [RFC v2 05/12] drm/i915/svm: Page table mirroring support

2019-12-18 Thread Jason Gunthorpe
On Fri, Dec 13, 2019 at 01:56:07PM -0800, Niranjana Vishwanathapura wrote:
> +static struct i915_svm *vm_get_svm(struct i915_address_space *vm)
> +{
> + struct i915_svm *svm = vm->svm;
> +
> + mutex_lock(>svm_mutex);
> + if (svm && !kref_get_unless_zero(>ref))
> + svm = NULL;

Quite strange to see a get_unless_zero under a lock..

> +static void release_svm(struct kref *ref)
> +{
> + struct i915_svm *svm = container_of(ref, typeof(*svm), ref);
> + struct i915_address_space *vm = svm->vm;
> +
> + mmu_notifier_unregister(>notifier, svm->notifier.mm);

This would be a lot better to use mmu_notifier_put to manage the
reference and deallocation.

> +static u32 i915_svm_build_sg(struct i915_address_space *vm,
> +  struct hmm_range *range,
> +  struct sg_table *st)
> +{
> + struct scatterlist *sg;
> + u32 sg_page_sizes = 0;
> + u64 i, npages;
> +
> + sg = NULL;
> + st->nents = 0;
> + npages = (range->end - range->start) / PAGE_SIZE;
> +
> + /*
> +  * No need to dma map the host pages and later unmap it, as
> +  * GPU is not allowed to access it with SVM.
> +  * XXX: Need to dma map host pages for integrated graphics while
> +  * extending SVM support there.
> +  */
> + for (i = 0; i < npages; i++) {
> + u64 addr = range->pfns[i] & ~((1UL << range->pfn_shift) - 1);
> +
> + if (sg && (addr == (sg_dma_address(sg) + sg->length))) {
> + sg->length += PAGE_SIZE;
> + sg_dma_len(sg) += PAGE_SIZE;
> + continue;
> + }
> +
> + if (sg)
> + sg_page_sizes |= sg->length;
> +
> + sg =  sg ? __sg_next(sg) : st->sgl;
> + sg_dma_address(sg) = addr;
> + sg_dma_len(sg) = PAGE_SIZE;

This still can't be like this - assigning pfn to 'dma_address' is
fundamentally wrong.

Whatever explanation you had, this needs to be fixed up first before we get
to this patch.

> +static int i915_range_fault(struct svm_notifier *sn,
> + struct drm_i915_gem_vm_bind *args,
> + struct sg_table *st, u64 *pfns)
> +{
> + unsigned long timeout =
> + jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
> + /* Have HMM fault pages within the fault window to the GPU. */
> + struct hmm_range range = {
> + .notifier = >notifier,
> + .start = sn->notifier.interval_tree.start,
> + .end = sn->notifier.interval_tree.last + 1,
> + .pfns = pfns,
> + .pfn_shift = PAGE_SHIFT,
> + .flags = i915_range_flags,
> + .values = i915_range_values,
> + .default_flags = (range.flags[HMM_PFN_VALID] |
> +   ((args->flags & I915_GEM_VM_BIND_READONLY) ?
> +0 : range.flags[HMM_PFN_WRITE])),
> + .pfn_flags_mask = 0,
> +
> + };
> + struct i915_svm *svm = sn->svm;
> + struct mm_struct *mm = sn->notifier.mm;
> + struct i915_address_space *vm = svm->vm;
> + u32 sg_page_sizes;
> + u64 flags;
> + long ret;
> +
> + while (true) {
> + if (time_after(jiffies, timeout))
> + return -EBUSY;
> +
> + range.notifier_seq = mmu_interval_read_begin(range.notifier);
> + down_read(>mmap_sem);
> + ret = hmm_range_fault(, 0);
> + up_read(>mmap_sem);
> + if (ret <= 0) {
> + if (ret == 0 || ret == -EBUSY)
> + continue;
> + return ret;
> + }
> +
> + sg_page_sizes = i915_svm_build_sg(vm, , st);
> + mutex_lock(>mutex);
> + if (mmu_interval_read_retry(range.notifier,
> + range.notifier_seq)) {
> + mutex_unlock(>mutex);
> + continue;
> + }
> + break;
> + }
> +
> + flags = (args->flags & I915_GEM_VM_BIND_READONLY) ?
> + I915_GTT_SVM_READONLY : 0;
> + ret = svm_bind_addr_commit(vm, args->start, args->length,
> +flags, st, sg_page_sizes);
> + mutex_unlock(>mutex);

This looks better..

> +int i915_gem_vm_bind_svm_buffer(struct i915_address_space *vm,
> + struct drm_i915_gem_vm_bind *args)
> +{
> + struct svm_notifier sn;
> + struct i915_svm *svm;
> + struct mm_struct *mm;
> + struct sg_table st;
> + u64 npages, *pfns;
> + int ret = 0;
> +
> + svm = vm_get_svm(vm);
> + if (!svm)
> + return -EINVAL;
> +
> + mm = svm->notifier.mm;
> + if (mm != current->mm) {
> + ret = -EPERM;
> + goto bind_done;
> + }

Bit strange, we have APIs now to make it fairly easy to deal with
multiple processe, 

[RFC v2 05/12] drm/i915/svm: Page table mirroring support

2019-12-13 Thread Niranjana Vishwanathapura
Use HMM page table mirroring support to build device page table.
Implement the bind ioctl and bind the process address range in the
specified context's ppgtt.
Handle invalidation notifications by unbinding the address range.

Cc: Joonas Lahtinen 
Cc: Jon Bloomfield 
Cc: Daniel Vetter 
Cc: Sudeep Dutt 
Signed-off-by: Niranjana Vishwanathapura 
---
 drivers/gpu/drm/i915/Kconfig|   3 +
 drivers/gpu/drm/i915/Makefile   |   3 +-
 drivers/gpu/drm/i915/i915_drv.c |   5 +
 drivers/gpu/drm/i915/i915_gem_gtt.c |   5 +
 drivers/gpu/drm/i915/i915_gem_gtt.h |   4 +
 drivers/gpu/drm/i915/i915_svm.c | 324 
 drivers/gpu/drm/i915/i915_svm.h |  50 +
 include/uapi/drm/i915_drm.h |   9 +-
 8 files changed, 401 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_svm.c
 create mode 100644 drivers/gpu/drm/i915/i915_svm.h

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index c2e48710eec8..689e57fe3973 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -141,6 +141,9 @@ config DRM_I915_SVM
bool "Enable Shared Virtual Memory support in i915"
depends on STAGING
depends on DRM_I915
+   depends on MMU
+   select HMM_MIRROR
+   select MMU_NOTIFIER
default n
help
  Choose this option if you want Shared Virtual Memory (SVM)
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 75fe45633779..7d4cd9eefd12 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -154,7 +154,8 @@ i915-y += \
  intel_wopcm.o
 
 # SVM code
-i915-$(CONFIG_DRM_I915_SVM) += gem/i915_gem_svm.o
+i915-$(CONFIG_DRM_I915_SVM) += gem/i915_gem_svm.o \
+  i915_svm.o
 
 # general-purpose microcontroller (GuC) support
 obj-y += gt/uc/
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index d452ea8e40b3..866d3cbb1edf 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -62,6 +62,7 @@
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_ioctls.h"
 #include "gem/i915_gem_mman.h"
+#include "gem/i915_gem_svm.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_rc6.h"
@@ -73,6 +74,7 @@
 #include "i915_perf.h"
 #include "i915_query.h"
 #include "i915_suspend.h"
+#include "i915_svm.h"
 #include "i915_switcheroo.h"
 #include "i915_sysfs.h"
 #include "i915_trace.h"
@@ -2694,6 +2696,9 @@ static int i915_gem_vm_bind_ioctl(struct drm_device *dev, 
void *data,
switch (args->type) {
case I915_GEM_VM_BIND_SVM_OBJ:
ret = i915_gem_vm_bind_svm_obj(vm, args, file);
+   break;
+   case I915_GEM_VM_BIND_SVM_BUFFER:
+   ret = i915_gem_vm_bind_svm_buffer(vm, args);
}
 
i915_vm_put(vm);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 6657ff41dc3f..192674f03e4e 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -42,6 +42,7 @@
 
 #include "i915_drv.h"
 #include "i915_scatterlist.h"
+#include "i915_svm.h"
 #include "i915_trace.h"
 #include "i915_vgpu.h"
 
@@ -562,6 +563,7 @@ static void i915_address_space_fini(struct 
i915_address_space *vm)
drm_mm_takedown(>mm);
 
mutex_destroy(>mutex);
+   mutex_destroy(>svm_mutex);
 }
 
 void __i915_vm_close(struct i915_address_space *vm)
@@ -591,6 +593,7 @@ static void __i915_vm_release(struct work_struct *work)
struct i915_address_space *vm =
container_of(work, struct i915_address_space, rcu.work);
 
+   i915_svm_unbind_mm(vm);
vm->cleanup(vm);
i915_address_space_fini(vm);
 
@@ -620,6 +623,7 @@ static void i915_address_space_init(struct 
i915_address_space *vm, int subclass)
 * attempt holding the lock is immediately reported by lockdep.
 */
mutex_init(>mutex);
+   mutex_init(>svm_mutex);
lockdep_set_subclass(>mutex, subclass);
i915_gem_shrinker_taints_mutex(vm->i915, >mutex);
 
@@ -631,6 +635,7 @@ static void i915_address_space_init(struct 
i915_address_space *vm, int subclass)
 
INIT_LIST_HEAD(>bound_list);
INIT_LIST_HEAD(>svm_list);
+   RCU_INIT_POINTER(vm->svm, NULL);
 }
 
 static int __setup_page_dma(struct i915_address_space *vm,
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h 
b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 8a8a314e1295..e06e6447e0d7 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -293,6 +293,8 @@ struct i915_svm_obj {
u64 offset;
 };
 
+struct i915_svm;
+
 struct i915_address_space {
struct kref ref;
struct rcu_work rcu;
@@ -342,6 +344,8 @@ struct i915_address_space {
 */
struct list_head svm_list;
unsigned int svm_count;
+   struct i915_svm *svm;
+   struct mutex svm_mutex;