[Intel-gfx] ✗ Fi.CI.BUILD: failure for Dynamicaly allocate SG table from the pages (rev3)

2020-10-04 Thread Patchwork
== Series Details ==

Series: Dynamicaly allocate SG table from the pages (rev3)
URL   : https://patchwork.freedesktop.org/series/81962/
State : failure

== Summary ==

Applying: This series extends __sg_alloc_table_from_pages to allow chaining of
error: sha1 information is lacking or useless 
(tools/testing/scatterlist/main.c).
error: could not build fake ancestor
hint: Use 'git am --show-current-patch=diff' to see the failed patch
Patch failed at 0001 This series extends __sg_alloc_table_from_pages to allow 
chaining of
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH rdma-next v5 0/4] Dynamicaly allocate SG table from the pages

2020-10-04 Thread Leon Romanovsky
From: Leon Romanovsky 

Changelog:
v5:
 * Use sg_init_table to allocate table and avoid changes is __sg_alloc_table
 * Fix offset issue
v4: https://lore.kernel.org/lkml/20200927064647.3106737-1-l...@kernel.org
 * Fixed formatting in first patch.
 * Added fix (clear tmp_netnts) in first patch to fix i915 failure.
 * Added test patches
v3: https://lore.kernel.org/linux-rdma/20200922083958.2150803-1-l...@kernel.org/
 * Squashed Christopher's suggestion to avoid introduced new API, but extend 
existing one.
v2: https://lore.kernel.org/linux-rdma/20200916140726.839377-1-l...@kernel.org
 * Fixed indentations and comments
 * Deleted sg_alloc_next()
 * Squashed lib/scatterlist patches into one
v1: https://lore.kernel.org/lkml/20200910134259.1304543-1-l...@kernel.org
 * Changed _sg_chain to be __sg_chain
 * Added dependency on ARCH_NO_SG_CHAIN
 * Removed struct sg_append
v0:
 * https://lore.kernel.org/lkml/20200903121853.1145976-1-l...@kernel.org

--
>From Maor:

This series extends __sg_alloc_table_from_pages to allow chaining of
new pages to already initialized SG table.

This allows for the drivers to utilize the optimization of merging contiguous
pages without a need to pre allocate all the pages and hold them in
a very large temporary buffer prior to the call to SG table initialization.

The second patch changes the Infiniband driver to use the new API. It
removes duplicate functionality from the code and benefits the
optimization of allocating dynamic SG table from pages.

In huge pages system of 2MB page size, without this change, the SG table
would contain x512 SG entries.
E.g. for 100GB memory registration:

 Number of entries  Size
Before26214400  600.0MB
After512001.2MB

Thanks

Maor Gottlieb (2):
  lib/scatterlist: Add support in dynamic allocation of SG table from
pages
  RDMA/umem: Move to allocate SG table from pages

Tvrtko Ursulin (2):
  tools/testing/scatterlist: Rejuvenate bit-rotten test
  tools/testing/scatterlist: Show errors in human readable form

 drivers/gpu/drm/i915/gem/i915_gem_userptr.c |  12 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c  |  15 ++-
 drivers/infiniband/core/umem.c  |  94 ++-
 include/linux/scatterlist.h |  38 +++---
 lib/scatterlist.c   | 125 
 tools/testing/scatterlist/Makefile  |   3 +-
 tools/testing/scatterlist/linux/mm.h|  35 ++
 tools/testing/scatterlist/main.c|  53 ++---
 8 files changed, 225 insertions(+), 150 deletions(-)

--
2.26.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH rdma-next v5 3/4] tools/testing/scatterlist: Show errors in human readable form

2020-10-04 Thread Leon Romanovsky
From: Tvrtko Ursulin 

Instead of just asserting dump some more useful info about what the test
saw versus what it expected to see.

Signed-off-by: Tvrtko Ursulin 
Cc: Maor Gottlieb 
Signed-off-by: Leon Romanovsky 
---
 tools/testing/scatterlist/main.c | 44 
 1 file changed, 34 insertions(+), 10 deletions(-)

diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c
index 4899359a31ac..b2c7e9f7b8d3 100644
--- a/tools/testing/scatterlist/main.c
+++ b/tools/testing/scatterlist/main.c
@@ -5,6 +5,15 @@

 #define MAX_PAGES (64)

+struct test {
+   int alloc_ret;
+   unsigned num_pages;
+   unsigned *pfn;
+   unsigned size;
+   unsigned int max_seg;
+   unsigned int expected_segments;
+};
+
 static void set_pages(struct page **pages, const unsigned *array, unsigned num)
 {
unsigned int i;
@@ -17,17 +26,32 @@ static void set_pages(struct page **pages, const unsigned 
*array, unsigned num)

 #define pfn(...) (unsigned []){ __VA_ARGS__ }

+static void fail(struct test *test, struct sg_table *st, const char *cond)
+{
+   unsigned int i;
+
+   fprintf(stderr, "Failed on '%s'!\n\n", cond);
+
+   printf("size = %u, max segment = %u, expected nents = %u\nst->nents = 
%u, st->orig_nents= %u\n",
+  test->size, test->max_seg, test->expected_segments, st->nents,
+  st->orig_nents);
+
+   printf("%u input PFNs:", test->num_pages);
+   for (i = 0; i < test->num_pages; i++)
+   printf(" %x", test->pfn[i]);
+   printf("\n");
+
+   exit(1);
+}
+
+#define VALIDATE(cond, st, test) \
+   if (!(cond)) \
+   fail((test), (st), #cond);
+
 int main(void)
 {
const unsigned int sgmax = SCATTERLIST_MAX_SEGMENT;
-   struct test {
-   int alloc_ret;
-   unsigned num_pages;
-   unsigned *pfn;
-   unsigned size;
-   unsigned int max_seg;
-   unsigned int expected_segments;
-   } *test, tests[] = {
+   struct test *test, tests[] = {
{ -EINVAL, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 },
{ -EINVAL, 1, pfn(0), PAGE_SIZE, 0, 1 },
{ -EINVAL, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 },
@@ -66,8 +90,8 @@ int main(void)
if (test->alloc_ret)
continue;

-   assert(st.nents == test->expected_segments);
-   assert(st.orig_nents == test->expected_segments);
+   VALIDATE(st.nents == test->expected_segments, &st, test);
+   VALIDATE(st.orig_nents == test->expected_segments, &st, test);

sg_free_table(&st);
}
--
2.26.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH rdma-next v5 1/4] lib/scatterlist: Add support in dynamic allocation of SG table from pages

2020-10-04 Thread Leon Romanovsky
From: Maor Gottlieb 

Extend __sg_alloc_table_from_pages to support dynamic allocation of
SG table from pages. It should be used by drivers that can't supply
all the pages at one time.

This function returns the last populated SGE in the table. Users should
pass it as an argument to the function from the second call and forward.
As before, nents will be equal to the number of populated SGEs (chunks).

With this new extension, drivers can benefit the optimization of merging
contiguous pages without a need to allocate all pages in advance and
hold them in a large buffer.

E.g. with the Infiniband driver that allocates a single page for hold the
pages. For 1TB memory registration, the temporary buffer would consume only
4KB, instead of 2GB.

Signed-off-by: Maor Gottlieb 
Reviewed-by: Christoph Hellwig 
Signed-off-by: Leon Romanovsky 
---
 drivers/gpu/drm/i915/gem/i915_gem_userptr.c |  12 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c  |  15 ++-
 include/linux/scatterlist.h |  38 +++---
 lib/scatterlist.c   | 125 
 tools/testing/scatterlist/main.c|   9 +-
 5 files changed, 142 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c 
b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 12b30075134a..f2eaed6aca3d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -403,6 +403,7 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object 
*obj,
unsigned int max_segment = i915_sg_segment_size();
struct sg_table *st;
unsigned int sg_page_sizes;
+   struct scatterlist *sg;
int ret;

st = kmalloc(sizeof(*st), GFP_KERNEL);
@@ -410,13 +411,12 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object 
*obj,
return ERR_PTR(-ENOMEM);

 alloc_table:
-   ret = __sg_alloc_table_from_pages(st, pvec, num_pages,
- 0, num_pages << PAGE_SHIFT,
- max_segment,
- GFP_KERNEL);
-   if (ret) {
+   sg = __sg_alloc_table_from_pages(st, pvec, num_pages, 0,
+num_pages << PAGE_SHIFT, max_segment,
+NULL, 0, GFP_KERNEL);
+   if (IS_ERR(sg)) {
kfree(st);
-   return ERR_PTR(ret);
+   return ERR_CAST(sg);
}

ret = i915_gem_gtt_prepare_pages(obj, st);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
index ab524ab3b0b4..f22acd398b1f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
@@ -419,6 +419,7 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt)
int ret = 0;
static size_t sgl_size;
static size_t sgt_size;
+   struct scatterlist *sg;

if (vmw_tt->mapped)
return 0;
@@ -441,13 +442,15 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt)
if (unlikely(ret != 0))
return ret;

-   ret = __sg_alloc_table_from_pages
-   (&vmw_tt->sgt, vsgt->pages, vsgt->num_pages, 0,
-(unsigned long) vsgt->num_pages << PAGE_SHIFT,
-dma_get_max_seg_size(dev_priv->dev->dev),
-GFP_KERNEL);
-   if (unlikely(ret != 0))
+   sg = __sg_alloc_table_from_pages(&vmw_tt->sgt, vsgt->pages,
+   vsgt->num_pages, 0,
+   (unsigned long) vsgt->num_pages << PAGE_SHIFT,
+   dma_get_max_seg_size(dev_priv->dev->dev),
+   NULL, 0, GFP_KERNEL);
+   if (IS_ERR(sg)) {
+   ret = PTR_ERR(sg);
goto out_sg_alloc_fail;
+   }

if (vsgt->num_pages > vmw_tt->sgt.nents) {
uint64_t over_alloc =
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 45cf7b69d852..36c47e7e66a2 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -165,6 +165,22 @@ static inline void sg_set_buf(struct scatterlist *sg, 
const void *buf,
 #define for_each_sgtable_dma_sg(sgt, sg, i)\
for_each_sg((sgt)->sgl, sg, (sgt)->nents, i)

+static inline void __sg_chain(struct scatterlist *chain_sg,
+ struct scatterlist *sgl)
+{
+   /*
+* offset and length are unused for chain entry. Clear them.
+*/
+   chain_sg->offset = 0;
+   chain_sg->length = 0;
+
+   /*
+* Set lowest bit to indicate a link pointer, and make sure to clear
+* the termination bit if it happens to be set.
+*/
+   chain_sg->page_link = ((unsigned long) sgl | SG_CHAIN) & ~SG_END;
+}
+
 /**
  *

[Intel-gfx] [PATCH rdma-next v5 4/4] RDMA/umem: Move to allocate SG table from pages

2020-10-04 Thread Leon Romanovsky
From: Maor Gottlieb 

Remove the implementation of ib_umem_add_sg_table and instead
call to __sg_alloc_table_from_pages which already has the logic to
merge contiguous pages.

Besides that it removes duplicated functionality, it reduces the
memory consumption of the SG table significantly. Prior to this
patch, the SG table was allocated in advance regardless consideration
of contiguous pages.

In huge pages system of 2MB page size, without this change, the SG table
would contain x512 SG entries.
E.g. for 100GB memory registration:

 Number of entries  Size
Before26214400  600.0MB
After512001.2MB

Signed-off-by: Maor Gottlieb 
Signed-off-by: Leon Romanovsky 
---
 drivers/infiniband/core/umem.c | 94 +-
 1 file changed, 12 insertions(+), 82 deletions(-)

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index c1ab6a4f2bc3..e9fecbdf391b 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -61,73 +61,6 @@ static void __ib_umem_release(struct ib_device *dev, struct 
ib_umem *umem, int d
sg_free_table(&umem->sg_head);
 }

-/* ib_umem_add_sg_table - Add N contiguous pages to scatter table
- *
- * sg: current scatterlist entry
- * page_list: array of npage struct page pointers
- * npages: number of pages in page_list
- * max_seg_sz: maximum segment size in bytes
- * nents: [out] number of entries in the scatterlist
- *
- * Return new end of scatterlist
- */
-static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg,
-   struct page **page_list,
-   unsigned long npages,
-   unsigned int max_seg_sz,
-   int *nents)
-{
-   unsigned long first_pfn;
-   unsigned long i = 0;
-   bool update_cur_sg = false;
-   bool first = !sg_page(sg);
-
-   /* Check if new page_list is contiguous with end of previous page_list.
-* sg->length here is a multiple of PAGE_SIZE and sg->offset is 0.
-*/
-   if (!first && (page_to_pfn(sg_page(sg)) + (sg->length >> PAGE_SHIFT) ==
-  page_to_pfn(page_list[0])))
-   update_cur_sg = true;
-
-   while (i != npages) {
-   unsigned long len;
-   struct page *first_page = page_list[i];
-
-   first_pfn = page_to_pfn(first_page);
-
-   /* Compute the number of contiguous pages we have starting
-* at i
-*/
-   for (len = 0; i != npages &&
- first_pfn + len == page_to_pfn(page_list[i]) &&
- len < (max_seg_sz >> PAGE_SHIFT);
-len++)
-   i++;
-
-   /* Squash N contiguous pages from page_list into current sge */
-   if (update_cur_sg) {
-   if ((max_seg_sz - sg->length) >= (len << PAGE_SHIFT)) {
-   sg_set_page(sg, sg_page(sg),
-   sg->length + (len << PAGE_SHIFT),
-   0);
-   update_cur_sg = false;
-   continue;
-   }
-   update_cur_sg = false;
-   }
-
-   /* Squash N contiguous pages into next sge or first sge */
-   if (!first)
-   sg = sg_next(sg);
-
-   (*nents)++;
-   sg_set_page(sg, first_page, len << PAGE_SHIFT, 0);
-   first = false;
-   }
-
-   return sg;
-}
-
 /**
  * ib_umem_find_best_pgsz - Find best HW page size to use for this MR
  *
@@ -217,7 +150,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, 
unsigned long addr,
struct mm_struct *mm;
unsigned long npages;
int ret;
-   struct scatterlist *sg;
+   struct scatterlist *sg = NULL;
unsigned int gup_flags = FOLL_WRITE;

/*
@@ -272,15 +205,9 @@ struct ib_umem *ib_umem_get(struct ib_device *device, 
unsigned long addr,

cur_base = addr & PAGE_MASK;

-   ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
-   if (ret)
-   goto vma;
-
if (!umem->writable)
gup_flags |= FOLL_FORCE;

-   sg = umem->sg_head.sgl;
-
while (npages) {
cond_resched();
ret = pin_user_pages_fast(cur_base,
@@ -292,15 +219,19 @@ struct ib_umem *ib_umem_get(struct ib_device *device, 
unsigned long addr,
goto umem_release;

cur_base += ret * PAGE_SIZE;
-   npages   -= ret;
-
-   sg = ib_umem_add_sg_table(sg, page_list, ret,
-   dma_get_max_seg_size(device->dma_device),
-   &umem->sg_nent

[Intel-gfx] [PATCH rdma-next v5 2/4] tools/testing/scatterlist: Rejuvenate bit-rotten test

2020-10-04 Thread Leon Romanovsky
From: Tvrtko Ursulin 

A couple small tweaks are needed to make the test build and run
on current kernels.

Signed-off-by: Tvrtko Ursulin 
Cc: Maor Gottlieb 
Signed-off-by: Leon Romanovsky 
---
 tools/testing/scatterlist/Makefile   |  3 ++-
 tools/testing/scatterlist/linux/mm.h | 35 
 2 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/tools/testing/scatterlist/Makefile 
b/tools/testing/scatterlist/Makefile
index cbb003d9305e..c65233876622 100644
--- a/tools/testing/scatterlist/Makefile
+++ b/tools/testing/scatterlist/Makefile
@@ -14,7 +14,7 @@ targets: include $(TARGETS)
 main: $(OFILES)

 clean:
-   $(RM) $(TARGETS) $(OFILES) scatterlist.c linux/scatterlist.h 
linux/highmem.h linux/kmemleak.h asm/io.h
+   $(RM) $(TARGETS) $(OFILES) scatterlist.c linux/scatterlist.h 
linux/highmem.h linux/kmemleak.h linux/slab.h asm/io.h
@rmdir asm

 scatterlist.c: ../../../lib/scatterlist.c
@@ -28,4 +28,5 @@ include: ../../../include/linux/scatterlist.h
@touch asm/io.h
@touch linux/highmem.h
@touch linux/kmemleak.h
+   @touch linux/slab.h
@cp $< linux/scatterlist.h
diff --git a/tools/testing/scatterlist/linux/mm.h 
b/tools/testing/scatterlist/linux/mm.h
index 6f9ac14aa800..6ae907f375d2 100644
--- a/tools/testing/scatterlist/linux/mm.h
+++ b/tools/testing/scatterlist/linux/mm.h
@@ -114,6 +114,12 @@ static inline void *kmalloc(unsigned int size, unsigned 
int flags)
return malloc(size);
 }

+static inline void *
+kmalloc_array(unsigned int n, unsigned int size, unsigned int flags)
+{
+   return malloc(n * size);
+}
+
 #define kfree(x) free(x)

 #define kmemleak_alloc(a, b, c, d)
@@ -122,4 +128,33 @@ static inline void *kmalloc(unsigned int size, unsigned 
int flags)
 #define PageSlab(p) (0)
 #define flush_kernel_dcache_page(p)

+#define MAX_ERRNO  4095
+
+#define IS_ERR_VALUE(x) unlikely((unsigned long)(void *)(x) >= (unsigned 
long)-MAX_ERRNO)
+
+static inline void * __must_check ERR_PTR(long error)
+{
+   return (void *) error;
+}
+
+static inline long __must_check PTR_ERR(__force const void *ptr)
+{
+   return (long) ptr;
+}
+
+static inline bool __must_check IS_ERR(__force const void *ptr)
+{
+   return IS_ERR_VALUE((unsigned long)ptr);
+}
+
+static inline int __must_check PTR_ERR_OR_ZERO(__force const void *ptr)
+{
+   if (IS_ERR(ptr))
+   return PTR_ERR(ptr);
+   else
+   return 0;
+}
+
+#define IS_ENABLED(x) (0)
+
 #endif
--
2.26.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx