[Mesa-dev] [PATCH 1/1] anv/ehl: 36bits ppgtt support

2019-04-10 Thread James Xiong
From: "Xiong, James" 

The vma high heap's capacity and maximum address were pre-defined based
on 48bits ppgtt support, and the buffers allocated from the vma high heap
had invalid vma addresses to the ehl platform that only supports 36bits
ppgtt. As a result, KMD rejected all batchbuffers submitted by vulkan.

This patch:
1) initializes the vma high heap by retrieving the gtt capacity from KMD
and calculating the size and max address on the fly.
2) enables softpin when full ppgtt is enabled

V2: change commit messages and comments to refect the changes [Bob, Jason]
remove define HIGH_HEAP_SIZE [Bob]
make sure there's enough space to enable softspin [Jason]

Signed-off-by: Xiong, James 
---
 src/intel/vulkan/anv_device.c  | 30 +++---
 src/intel/vulkan/anv_private.h |  7 ---
 2 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 88b34c4..c3eff1c 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -434,7 +434,12 @@ anv_physical_device_init(struct anv_physical_device 
*device,
   anv_gem_supports_syncobj_wait(fd);
device->has_context_priority = anv_gem_has_context_priority(fd);
 
+   /*
+* make sure there are enough VA space(i.e. 32+bit support) and full ggtt
+* is enabled.
+*/
device->use_softpin = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN)
+  && (anv_gem_get_param(fd, I915_PARAM_HAS_ALIASING_PPGTT) > 1)
   && device->supports_48bit_addresses;
 
device->has_context_isolation =
@@ -1981,14 +1986,25 @@ VkResult anv_CreateDevice(
   device->vma_lo_available =
  physical_device->memory.heaps[physical_device->memory.heap_count - 
1].size;
 
-  /* Leave the last 4GiB out of the high vma range, so that no state base
-   * address + size can overflow 48 bits. For more information see the
-   * comment about Wa32bitGeneralStateOffset in anv_allocator.c
-   */
-  util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS,
- HIGH_HEAP_SIZE);
   device->vma_hi_available = physical_device->memory.heap_count == 1 ? 0 :
  physical_device->memory.heaps[0].size;
+
+  /* Retrieve the GTT's capacity and leave the last 4GiB out of the high 
vma
+   * range, so that no state base address + size can overflow the vma 
range. For
+   * more information see the comment about Wa32bitGeneralStateOffset in
+   * anv_allocator.c
+   */
+  uint64_t size = 0;
+  anv_gem_get_context_param(device->fd, 0, I915_CONTEXT_PARAM_GTT_SIZE,
+&size);
+  if(size > HIGH_HEAP_MIN_ADDRESS + (1ull<<32)) {
+ size -= HIGH_HEAP_MIN_ADDRESS + (1ull<<32);
+ device->vma_hi_max_addr = HIGH_HEAP_MIN_ADDRESS + size - 1;
+  } else {
+ size = device->vma_hi_max_addr = 0;
+  }
+
+  util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS, size);
}
 
/* As per spec, the driver implementation may deny requests to acquire
@@ -2456,7 +2472,7 @@ anv_vma_free(struct anv_device *device, struct anv_bo *bo)
   device->vma_lo_available += bo->size;
} else {
   assert(addr_48b >= HIGH_HEAP_MIN_ADDRESS &&
- addr_48b <= HIGH_HEAP_MAX_ADDRESS);
+ addr_48b <= device->vma_hi_max_addr);
   util_vma_heap_free(&device->vma_hi, addr_48b, bo->size);
   device->vma_hi_available += bo->size;
}
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 1664918..ef9b012 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -109,6 +109,9 @@ struct gen_l3_config;
  * heap. Various hardware units will read past the end of an object for
  * various reasons. This healthy margin prevents reads from wrapping around
  * 48-bit addresses.
+ *
+ * (4) the high vma heap size and max address are calculated based on the
+ * gtt capacity retrieved from KMD.
  */
 #define LOW_HEAP_MIN_ADDRESS   0x1000ULL /* 4 KiB */
 #define LOW_HEAP_MAX_ADDRESS   0xbfffULL
@@ -121,12 +124,9 @@ struct gen_l3_config;
 #define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x00018000ULL /* 6 GiB */
 #define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x0001bfffULL
 #define HIGH_HEAP_MIN_ADDRESS  0x0001c000ULL /* 7 GiB */
-#define HIGH_HEAP_MAX_ADDRESS  0xfffeULL
 
 #define LOW_HEAP_SIZE   \
(LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1)
-#define HIGH_HEAP_SIZE  \
-   (HIGH_HEAP_MAX_ADDRESS - HIGH_HEAP_MIN_ADDRESS + 1)
 #define DYNAMIC_STATE_POOL_SIZE \
(DYNAMIC_STATE_POOL_MAX_ADDRESS - DYNAMIC_STATE_POOL_MIN_ADDRESS + 1)
 #define BINDING_TABLE_POOL_SIZE \
@@ -1093,6 +1093,7 @@ struct anv_device {
 struct util_vma_heapvma_hi;
 uint64_tvma_lo_available;
 uint64_t   

[Mesa-dev] [PATCH 1/1] iris: set default tiling mode to X-tiled

2019-04-04 Thread James Xiong
From: "Xiong, James" 

UMDs such as modesetting can use legacy ADDFD which doesn't accept
Y-tiled buffer, ths patch sets the default tiling mode to X-tiled
for backward compatibility.

Signed-off-by: Xiong, James 
---
 src/gallium/drivers/iris/iris_resource.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/iris/iris_resource.c 
b/src/gallium/drivers/iris/iris_resource.c
index d0a473f..56c9590 100644
--- a/src/gallium/drivers/iris/iris_resource.c
+++ b/src/gallium/drivers/iris/iris_resource.c
@@ -558,7 +558,8 @@ iris_resource_create_with_modifiers(struct pipe_screen 
*pscreen,
uint64_t modifier =
   select_best_modifier(devinfo, modifiers, modifiers_count);
 
-   isl_tiling_flags_t tiling_flags = ISL_TILING_ANY_MASK;
+   /* Historically, X-tiled was the default */
+   isl_tiling_flags_t tiling_flags = ISL_TILING_X_BIT;
 
if (modifier != DRM_FORMAT_MOD_INVALID) {
   res->mod_info = isl_drm_modifier_get_info(modifier);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/4] i965/drm: Searching for a cached buffer for reuse

2018-05-11 Thread James Xiong
On Fri, 11 May 2018 16:57:22 +0100
Chris Wilson  wrote:

> Quoting James Xiong (2018-05-11 16:35:04)
> > On Thu, 10 May 2018 13:56:12 -0700
> > Kenneth Graunke  wrote:
> > 
> > > On Friday, May 4, 2018 5:56:04 PM PDT James Xiong wrote:
> > > > From: "Xiong, James" 
> > > > 
> > > > Now that a bucket contains cached buffers with different sizes,
> > > > go through its list and search for a cached buffer with enough
> > > > size.
> > > > 
> > > > Signed-off-by: Xiong, James 
<...>
> > > > pos, member);   \ 
> > > 
> > > Hi James,
> > > 
> > > I don't particularly like the idea of introducing linear linked
> > > list walks to find a buffer.  It's a really nice property to have
> > > every buffer in the bucket be suitable and be able to grab one
> > > immediately. It sounds like purging things more often in patch 4
> > > reduces the cost of this new list walk, but...it'd be nice to not
> > > add it in the first place.
> > Yes, the purge, as well as the cleanup routine which kicks in from
> > time
> 
> It should not be time to time, it should be whenever the cache is
> older than a 1s (use a timer thread if you are concerned) as it being
> checked everytime something is put into the cache (and so before the
> lists grow). 
That's exactly how it works, the cleanup gets called before a buffer is
going to be put in the bucket, there is also a check if the an earlier
cleaned up has been performed within 1s BTW.

Sorry for the misleading. 
> If it is overallocating such that you are running out of
> memory and not reusing buffers, the cache is barely functioning. Now
> that's the information you want to present, where/when/why/how the
> cache is not working. What is being put into the cache that is never
> reused? -Chris
I tried to cleanup cached buffers more aggressively, every single time
when a buffer is unreferenced, and measure the memory usage, it didn't
help much. I believe that cached-but-not-get-reused might contribute to
the overhead too but mostly it is from the bucket size roundup.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4] i965/drm: Searching for a cached buffer for reuse

2018-05-11 Thread James Xiong
On Thu, 10 May 2018 13:56:12 -0700
Kenneth Graunke  wrote:

> On Friday, May 4, 2018 5:56:04 PM PDT James Xiong wrote:
> > From: "Xiong, James" 
> > 
> > Now that a bucket contains cached buffers with different sizes, go
> > through its list and search for a cached buffer with enough size.
> > 
> > Signed-off-by: Xiong, James 
> > ---
> >  src/mesa/drivers/dri/i965/brw_bufmgr.c | 21 +++--
> >  src/util/list.h|  5 +
> >  2 files changed, 20 insertions(+), 6 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> > b/src/mesa/drivers/dri/i965/brw_bufmgr.c index 6a9b005..5235aa6
> > 100644 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> > +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> > @@ -281,7 +281,7 @@ cached_bo_for_size(struct brw_bufmgr *bufmgr,
> > assert(!(busy && zeroed));
> >  
> > if(bucket != NULL && !list_empty(&bucket->head)) {
> > -  struct brw_bo *bo;
> > +  struct brw_bo *bo, *temp_bo;
> >  retry:
> >bo = NULL;
> >  
> > @@ -292,8 +292,13 @@ retry:
> >* asked us to zero the buffer, we don't want this
> >* because we are going to mmap it.
> >*/
> > - bo = LIST_ENTRY(struct brw_bo, bucket->head.prev, head);
> > - list_del(&bo->head);
> > + LIST_FOR_EACH_ENTRY_REV(temp_bo, &bucket->head, head) {
> > +if (temp_bo->size >= size) {
> > +   bo = temp_bo;
> > +   list_del(&bo->head);
> > +   break;
> > +}
> > + }
> >} else {
> >   /* For non-render-target BOs (where we're probably
> >* going to map it first thing in order to fill it
> > @@ -302,9 +307,13 @@ retry:
> >* allocating a new buffer is probably faster than
> >* waiting for the GPU to finish.
> >*/
> > - bo = LIST_ENTRY(struct brw_bo, bucket->head.next, head);
> > - if (!brw_bo_busy(bo)) {
> > -list_del(&bo->head);
> > + LIST_FOR_EACH_ENTRY(temp_bo, &bucket->head, head) {
> > +if (temp_bo->size >= size &&
> > +!brw_bo_busy(temp_bo)) {
> > +   bo = temp_bo;
> > +   list_del(&bo->head);
> > +   break;
> > +}
> >   }
> >}
> >  
> > diff --git a/src/util/list.h b/src/util/list.h
> > index 6edb750..9362072 100644
> > --- a/src/util/list.h
> > +++ b/src/util/list.h
> > @@ -189,6 +189,11 @@ static inline void list_validate(struct
> > list_head *list) &pos->member !=
> > (head); \ pos =
> > container_of(pos->member.next, pos, member)) 
> > +#define LIST_FOR_EACH_ENTRY_REV(pos, head,
> > member)  \
> > +   for (pos = NULL, pos = container_of((head)->prev, pos,
> > member);  \
> > +&pos->member !=
> > (head); \
> > +pos = container_of(pos->member.prev, pos, member))
> > +
> >  #define LIST_FOR_EACH_ENTRY_SAFE(pos, storage, head,
> > member) \ for (pos = NULL, pos = container_of((head)->next,
> > pos, member),   \ storage = container_of(pos->member.next,
> > pos, member);   \ 
> 
> Hi James,
> 
> I don't particularly like the idea of introducing linear linked list
> walks to find a buffer.  It's a really nice property to have every
> buffer in the bucket be suitable and be able to grab one immediately.
> It sounds like purging things more often in patch 4 reduces the cost
> of this new list walk, but...it'd be nice to not add it in the first
> place.
Yes, the purge, as well as the cleanup routine which kicks in from time
to time and removes any cached buffers freed 1+ second ago, keeps the
list short. One thing I could think of is to have a fixed size cached
buffer array so that we could use a binary search instead of walks of
the double-linked list.
> 
> This also conflicts with my new VMA allocator approach a lot, which
> assumes that all buffers in a bucket are the same size...
Yes.
> 
> Still, you've shown some pretty nice memory savings.  Scott and I were
> wondering if it would be possible to achieve a similar effect by
> tuning the bucket sizes - either adding more, or adjusting some.  The
> sizes we have are pretty arbitrary - we started with po

Re: [Mesa-dev] [PATCH 4/4] i965/drm: Purge the bucket when its cached buffer is evicted

2018-05-07 Thread James Xiong
On Sat, 5 May 2018 08:55:24 +0100
Chris Wilson  wrote:

> Quoting James Xiong (2018-05-05 01:56:05)
> > From: "Xiong, James" 
> > 
> > When one of cached buffers is found to be evicted by kernel,
> > most likely the buffers freed earlier than this buffer are
> > gone too, go through the cached list in the bucket and purge.  
> 
> The order in this list bears little relevance to the MRU lists in the
> kernel. (I wish it did.) So why? The list will be processed in due
> course, so why pay the cost now? Even in the worst case, they will be
> cleaned up as we are in the middle of processing the lists.
> -Chris
This is the existing logic, I think because the free time is the only
hint indicates whether a buffer is evicted or not in the user land,
maybe we could associate a fence with cached buffer in the future or
something like that, but at the moment i don't know how to improve it,
I just follow and make it work with the new implementation, the idea
is still the same as before.

The cleanup routine(cleanup_bo_cache?) is only triggered when a buffer's
ref goes down to 0, my guess is that it's not frequent enough so that we
need the purge to keep the cached list short maybe? Anyway, if we do
need to improve this part, we could do it in another patch.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/4] improve buffer cache and reuse

2018-05-07 Thread James Xiong
On Sat, 5 May 2018 09:11:28 +0100
Chris Wilson  wrote:

> Quoting James Xiong (2018-05-05 01:56:01)
> > This series align the buffer size up to page instead of a bucket
> > size to improve memory allocation efficiency.  
> 
> It doesn't though. It still retrieves up to the bucket size, so with a
> little cache poisoning (or a series of unfortunate events) it will be
> no better than before.
> 
> Perhaps open with the problem statement. What is it you are trying to
> fix? Would adding metrics to the buffer cache be a good start to
> demonstrating what needs improving?
> -Chris
In the worse case, it is the same as before; the best case however
reduces the allocated size by about 25% of the requested size. in
the real world, the best case was gl_5_off test where the patch saved
102749K(100+M) out of 1143593K, the worse case saved only 64K out of
12730K.

The current implementation allocates 0% to 25% more memory than
requested size with or without reuse enabled. I am trying to reduce the
memory penalty.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] i965/drm: Purge the bucket when its cached buffer is evicted

2018-05-04 Thread James Xiong
From: "Xiong, James" 

When one of cached buffers is found to be evicted by kernel,
most likely the buffers freed earlier than this buffer are
gone too, go through the cached list in the bucket and purge.

Signed-off-by: Xiong, James 
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 5235aa6..9f2e566 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -248,17 +248,20 @@ brw_bo_madvise(struct brw_bo *bo, int state)
return madv.retained;
 }
 
-/* drop the oldest entries that have been purged by the kernel */
+/* drop the entries that are older than the given time */
 static void
 brw_bo_cache_purge_bucket(struct brw_bufmgr *bufmgr,
-  struct bo_cache_bucket *bucket)
+  struct bo_cache_bucket *bucket,
+  time_t time)
 {
list_for_each_entry_safe(struct brw_bo, bo, &bucket->head, head) {
-  if (brw_bo_madvise(bo, I915_MADV_DONTNEED))
+  if (bo->free_time >= time) {
+ brw_bo_madvise(bo, I915_MADV_DONTNEED);
+ list_del(&bo->head);
+ bo_free(bo);
+  } else {
  break;
-
-  list_del(&bo->head);
-  bo_free(bo);
+  }
}
 }
 
@@ -319,8 +322,8 @@ retry:
 
   if (bo) {
  if (!brw_bo_madvise(bo, I915_MADV_WILLNEED)) {
+brw_bo_cache_purge_bucket(bufmgr, bucket, bo->free_time);
 bo_free(bo);
-brw_bo_cache_purge_bucket(bufmgr, bucket);
 return NULL;
  }
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/4] improve buffer cache and reuse

2018-05-04 Thread James Xiong
From: "Xiong, James" 

With the current implementation, brw_bufmgr may round up a request
size to the next bucket size, result in 25% more memory allocated in
the worst senario. For example:
Request sizeActual size
32KB+1Byte  40KB
.
8MB+1Byte   10MB
.
96MB+1Byte  112MB
This series align the buffer size up to page instead of a bucket size
to improve memory allocation efficiency.

Performance and memory usage were measured on a gen9 platform using
Basemark ES3, GfxBench 4 and 5, each test case ran 6 times.

Basemark ES3
scorepeak memory size(KB)
beforeafter   diff   before  after   diff
max avg   max avg maxavg
22  2123  21  2.83%  1.21%   409928  395573  -14355
20  2020  20  0.53%  0.41%  

GfxBench 4.0
scorepeak memory size(KB)
score   peak memory size(KB) 
before  after   diffbefore  after   diff
max   avg   max   avg   max avg
584   577   586   583   0.45%   1.02%   566489  539699  -26791
728   727   727   726   -0.03%  -0.16%  614730  586794  -27936
1604  1144  1650  1202  2.81%   4.86%   439220  411596  -27624
2711    2718  2152  0.25%   -3.25%  126065  121398  -4667
1218  1213  1212  1154  -0.53%  -5.10%  54153   53868   -285
106   104   106   103   0.85%   -1.66%  12730   12666   -64
1732  1709  1740  1728  0.49%   1.11%   475716  447726  -27990
3051  2969  3066  3047  0.50%   2.55%   154169  148962  -5207
2626  2607  2626  2625  0.00%   0.70%   84119   83150   -969
211   208   208   205   -1.26%  -1.21%  39924   39667   -257


GfxBench 5.0
score   peak memory size(KB)
beforeafter diffbefore   afterdiff
max  avg  max  avg  max avg  
260  258  259  256  -0.39%  -0.85%  037  1013520  -97517
298  295  298  297  0.00%   0.45%   1143593  1040844  -102749

Xiong, James (4):
  i965/drm: Reorganize code for the next patch
  i965/drm: Round down buffer size and calculate the bucket index
  i965/drm: Searching for a cached buffer for reuse
  i965/drm: Purge the bucket when its cached buffer is evicted

 src/mesa/drivers/dri/i965/brw_bufmgr.c | 139 ++---
 src/util/list.h|   5 ++
 2 files changed, 79 insertions(+), 65 deletions(-)

-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] i965/drm: Round down buffer size and calculate the bucket index

2018-05-04 Thread James Xiong
From: "Xiong, James" 

a buffer is now put in cached bucket #n when its size is between
bucket[n].size and bucket[n+1].size - 1

Signed-off-by: Xiong, James 
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index e68da26..6a9b005 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -189,8 +189,8 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t pitch, 
uint32_t tiling)
 static struct bo_cache_bucket *
 bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)
 {
-   /* Calculating the pages and rounding up to the page size. */
-   const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+   /* Calculating the pages and rounding down to the  page size. */
+   const unsigned pages = (size < PAGE_SIZE) ? 1 : size / PAGE_SIZE;
 
/* Row  Bucket sizesclz((x-1) | 3)   RowColumn
 *in pages  stride   size
@@ -211,8 +211,7 @@ bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)
int col_size_log2 = row - 1;
col_size_log2 += (col_size_log2 < 0);
 
-   const unsigned col = (pages - prev_row_max_pages +
-((1 << col_size_log2) - 1)) >> col_size_log2;
+   const unsigned col = (pages - prev_row_max_pages) >> col_size_log2;
 
/* Calculating the index based on the row and column. */
const unsigned index = (row * 4) + (col - 1);
@@ -1285,9 +1284,9 @@ add_bucket(struct brw_bufmgr *bufmgr, int size)
bufmgr->cache_bucket[i].size = size;
bufmgr->num_buckets++;
 
+   assert(bucket_for_size(bufmgr, size - 1) == 
&bufmgr->cache_bucket[i==0?0:i-1]);
assert(bucket_for_size(bufmgr, size) == &bufmgr->cache_bucket[i]);
-   assert(bucket_for_size(bufmgr, size - 2048) == &bufmgr->cache_bucket[i]);
-   assert(bucket_for_size(bufmgr, size + 1) != &bufmgr->cache_bucket[i]);
+   assert(bucket_for_size(bufmgr, size + 1) == &bufmgr->cache_bucket[i]);
 }
 
 static void
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] i965/drm: Reorganize code for the next patch

2018-05-04 Thread James Xiong
From: "Xiong, James" 

split bo_alloc_internal, and add a new function cached_bo_for_size
searches for a suitable cached buffer for a given size.

Signed-off-by: Xiong, James 
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 92 +-
 1 file changed, 45 insertions(+), 47 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 7cb1f03..e68da26 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -263,53 +263,29 @@ brw_bo_cache_purge_bucket(struct brw_bufmgr *bufmgr,
}
 }
 
+/* search for a suitable cached bo for reuse */
 static struct brw_bo *
-bo_alloc_internal(struct brw_bufmgr *bufmgr,
-  const char *name,
-  uint64_t size,
-  unsigned flags,
-  uint32_t tiling_mode,
-  uint32_t stride)
+cached_bo_for_size(struct brw_bufmgr *bufmgr,
+   uint64_t size,
+   uint32_t tiling_mode,
+   uint32_t stride,
+   unsigned flags)
 {
-   struct brw_bo *bo;
-   unsigned int page_size = getpagesize();
-   int ret;
-   struct bo_cache_bucket *bucket;
-   bool alloc_from_cache;
-   uint64_t bo_size;
-   bool busy = false;
-   bool zeroed = false;
-
-   if (flags & BO_ALLOC_BUSY)
-  busy = true;
-
-   if (flags & BO_ALLOC_ZEROED)
-  zeroed = true;
+   bool busy = (flags & BO_ALLOC_BUSY) ? true : false;
+   bool zeroed = (flags & BO_ALLOC_ZEROED) ? true : false;
+   struct bo_cache_bucket *bucket =
+  (bufmgr->bo_reuse) ? bucket_for_size(bufmgr, size) : NULL;
 
/* BUSY does doesn't really jive with ZEROED as we have to wait for it to
 * be idle before we can memset.  Just disallow that combination.
 */
assert(!(busy && zeroed));
 
-   /* Round the allocated size up to a power of two number of pages. */
-   bucket = bucket_for_size(bufmgr, size);
-
-   /* If we don't have caching at this size, don't actually round the
-* allocation up.
-*/
-   if (bucket == NULL) {
-  bo_size = size;
-  if (bo_size < page_size)
- bo_size = page_size;
-   } else {
-  bo_size = bucket->size;
-   }
-
-   mtx_lock(&bufmgr->lock);
-   /* Get a buffer out of the cache if available */
+   if(bucket != NULL && !list_empty(&bucket->head)) {
+  struct brw_bo *bo;
 retry:
-   alloc_from_cache = false;
-   if (bucket != NULL && !list_empty(&bucket->head)) {
+  bo = NULL;
+
   if (busy && !zeroed) {
  /* Allocate new render-target BOs from the tail (MRU)
   * of the list, as it will likely be hot in the GPU
@@ -319,7 +295,6 @@ retry:
   */
  bo = LIST_ENTRY(struct brw_bo, bucket->head.prev, head);
  list_del(&bo->head);
- alloc_from_cache = true;
   } else {
  /* For non-render-target BOs (where we're probably
   * going to map it first thing in order to fill it
@@ -330,16 +305,15 @@ retry:
   */
  bo = LIST_ENTRY(struct brw_bo, bucket->head.next, head);
  if (!brw_bo_busy(bo)) {
-alloc_from_cache = true;
 list_del(&bo->head);
  }
   }
 
-  if (alloc_from_cache) {
+  if (bo) {
  if (!brw_bo_madvise(bo, I915_MADV_WILLNEED)) {
 bo_free(bo);
 brw_bo_cache_purge_bucket(bufmgr, bucket);
-goto retry;
+return NULL;
  }
 
  if (bo_set_tiling_internal(bo, tiling_mode, stride)) {
@@ -353,20 +327,44 @@ retry:
bo_free(bo);
goto retry;
 }
-memset(map, 0, bo_size);
+memset(map, 0, bo->size);
  }
   }
+
+  return bo;
}
 
-   if (!alloc_from_cache) {
+   return NULL;
+}
+
+static struct brw_bo *
+bo_alloc_internal(struct brw_bufmgr *bufmgr,
+  const char *name,
+  uint64_t size,
+  unsigned flags,
+  uint32_t tiling_mode,
+  uint32_t stride)
+{
+   struct brw_bo *bo;
+   int ret;
+
+   /* align the request size to page size */
+   size = ALIGN(size, getpagesize());
+
+   mtx_lock(&bufmgr->lock);
+
+   /* Get a buffer out of the cache if available */
+   bo = cached_bo_for_size(bufmgr, size, tiling_mode, stride, flags);
+
+   if (bo == NULL) {
   bo = calloc(1, sizeof(*bo));
   if (!bo)
  goto err;
 
-  bo->size = bo_size;
+  bo->size = size;
   bo->idle = true;
 
-  struct drm_i915_gem_create create = { .size = bo_size };
+  struct drm_i915_gem_create create = { .size = size };
 
   /* All new BOs we get from the kernel are zeroed, so we don't need to
* worry about that here.
@@ -413,7 +411,7 @@ retry:
mtx_unlock(&bufmgr->lock);
 
DBG("bo_create: buf %d (%s) %llub\n", bo->gem_handle, bo->name,
-   (unsigned long long) size);
+   (unsigned long long) bo->size);
 
return bo;
 
-- 
2.7.4

___
mesa-dev mailing list
m

[Mesa-dev] [PATCH 3/4] i965/drm: Searching for a cached buffer for reuse

2018-05-04 Thread James Xiong
From: "Xiong, James" 

Now that a bucket contains cached buffers with different sizes, go
through its list and search for a cached buffer with enough size.

Signed-off-by: Xiong, James 
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 21 +++--
 src/util/list.h|  5 +
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 6a9b005..5235aa6 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -281,7 +281,7 @@ cached_bo_for_size(struct brw_bufmgr *bufmgr,
assert(!(busy && zeroed));
 
if(bucket != NULL && !list_empty(&bucket->head)) {
-  struct brw_bo *bo;
+  struct brw_bo *bo, *temp_bo;
 retry:
   bo = NULL;
 
@@ -292,8 +292,13 @@ retry:
   * asked us to zero the buffer, we don't want this
   * because we are going to mmap it.
   */
- bo = LIST_ENTRY(struct brw_bo, bucket->head.prev, head);
- list_del(&bo->head);
+ LIST_FOR_EACH_ENTRY_REV(temp_bo, &bucket->head, head) {
+if (temp_bo->size >= size) {
+   bo = temp_bo;
+   list_del(&bo->head);
+   break;
+}
+ }
   } else {
  /* For non-render-target BOs (where we're probably
   * going to map it first thing in order to fill it
@@ -302,9 +307,13 @@ retry:
   * allocating a new buffer is probably faster than
   * waiting for the GPU to finish.
   */
- bo = LIST_ENTRY(struct brw_bo, bucket->head.next, head);
- if (!brw_bo_busy(bo)) {
-list_del(&bo->head);
+ LIST_FOR_EACH_ENTRY(temp_bo, &bucket->head, head) {
+if (temp_bo->size >= size &&
+!brw_bo_busy(temp_bo)) {
+   bo = temp_bo;
+   list_del(&bo->head);
+   break;
+}
  }
   }
 
diff --git a/src/util/list.h b/src/util/list.h
index 6edb750..9362072 100644
--- a/src/util/list.h
+++ b/src/util/list.h
@@ -189,6 +189,11 @@ static inline void list_validate(struct list_head *list)
&pos->member != (head); \
pos = container_of(pos->member.next, pos, member))
 
+#define LIST_FOR_EACH_ENTRY_REV(pos, head, member)  \
+   for (pos = NULL, pos = container_of((head)->prev, pos, member);  \
+&pos->member != (head); \
+pos = container_of(pos->member.prev, pos, member))
+
 #define LIST_FOR_EACH_ENTRY_SAFE(pos, storage, head, member)   \
for (pos = NULL, pos = container_of((head)->next, pos, member), \
storage = container_of(pos->member.next, pos, member);  \
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/4] improve buffer cache and reuse

2018-05-02 Thread James Xiong
On Wed, 2 May 2018 14:18:21 +0300
Eero Tamminen  wrote:

> Hi,
> 
> On 02.05.2018 02:25, James Xiong wrote:
> > From: "Xiong, James" 
> > 
> > With the current implementation, brw_bufmgr may round up a request
> > size to the next bucket size, result in 25% more memory allocated in
> > the worst senario. For example:
> > Request sizeActual size
> > 32KB+1Byte  40KB
> > .
> > 8MB+1Byte   10MB
> > .
> > 96MB+1Byte  112MB
> > This series align the buffer size up to page instead of a bucket
> > size to improve memory allocation efficiency. Performances are
> > almost the same with Basemark ES3, GfxBench4 and 5:
> > 
> > Basemark ES3
> > scorepeak memory allocation
> >before  afterdiffbeforeafter  diff
> > 21.537462  21.888784  1.61%419766272  408809472  -10956800
> > 19.566198  19.763429  1.00%   
> 
> What memory you're measuring:
> 
> * VmSize (not that relevant unless you're running out of address
> space)?
> 
> * PrivateDirty (listed in /proc/PID/smaps and e.g. by "smem" tool
> [1])?
> 
> * total of allocation sizes used by Mesa?
> 
> Or something else?
> 
> In general, unused memory isn't much of a problem, only dirty
> (written) memory.  Kernel maps all unused memory to a single zero
> page, so unused memory takes only few bytes of RAM for the page table
> entries (required for tracking the allocation pages).
I did the measurements in brw_bufmgr from the user space, I kept tracks
of the allocated size for each brw_bufmgr context, and printed out the
peak allocated size when the test completed and context was destroyed.
basically I increased/decreased the size when I915_GEM_CREATE or
GEM_CLOSE were called, so the cached buffers, imported or user_ptr
buffers were excluded.

The brw_bufmgr context is created when the test starts and destroyed
after it completes, the size is for the test case in bytes. This method
can measure exact size allocated for a given test case and the result
is precise too.
> 
> 
> > GfxBench 4.0
> >  score
> > peak memory before after diff before
> > after diff gl_4 564.6052246094  565.2348632813
> > 0.11%   578490368 550199296 -28291072 gl_4_off
> > 727.0440063477  703.5833129883   -3.33% 629501952
> > 598216704 -31285248 gl_manhattan 1053.4223632813
> > 1057.3690185547 0.37%   449568768 421134336 -28434432
> > gl_trex  2708.0656738281 2699.2646484375 -0.33%
> > 130076672 125042688 -5033984 gl_alu2  1207.1490478516
> > 1212.2220458984 0.42%   55496704  55029760  -466944
> > gl_driver2   103.0383071899  103.5478439331  0.49%
> > 13107200  12980224  -126976 gl_manhattan_off 1703.4780273438
> > 1736.9074707031 1.92%   490016768 456548352 -33468416
> > gl_trex_off  2951.6809082031 3058.5422363281 3.49%
> > 157511680 152260608 -5251072 gl_alu2_off  2604.0903320313
> > 2626.2524414063 0.84%   86130688  85483520  -647168
> > gl_driver2_off   204.0173187256  207.0510101318  1.47%
> > 40869888  40615936  -253952  
> 
> You're missing information on:
> * On which plaform you did the testing (affects variance)
> * how many test rounds you ran, and
> * what is your variance
I ran these tests on a gen9 platform/ubuntu 17.10 LTS. Most of the tests
are consistent, especially the memory usage. The only exception is
GfxBench 4.0 gl_manhattan, I had to ran it 3 times and pick the highest
one. I will apply this method to all tests and re-send with updated
results.
> 
> -> I don't know whether your numbers are just random noise.  
> 
> 
> Memory is allocated in pages from kernel, so there's no point in
> showing its usage as bytes.  Please use KBs, that's more readable.
> 
> (Because of randomness e.g. interactions with the windowing system, 
> there can be some variance also in process memory usage, which may
> also be useful to report.)
> 
> Because of variance, you don't need that decimals for the scores. 
> Removing the extra ones makes that data a bit more readable too.
> 
> 
>   - Eero
> 
> [1] "smem" is python based tool available at least in Debian.
> If you want something simpler, e.g. shell script working with
> minimal shells like Busybox, you can use this:
> https://github.com/maemo-tools-old/sp-memusage/blob/master/scripts/mem-smaps-private
> 
> 
> > GfxBench 5.0
> >  score   peak memory
> >   beforeafter   b

[Mesa-dev] [PATCH 1/4] i965/drm: Reorganize code for the next patch

2018-05-01 Thread James Xiong
From: "Xiong, James" 

split bo_alloc_internal, and add a new function cached_bo_for_size
searches for a suitable cached buffer for a given size.

Signed-off-by: Xiong, James 
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 92 +-
 1 file changed, 45 insertions(+), 47 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 7cb1f03..e68da26 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -263,53 +263,29 @@ brw_bo_cache_purge_bucket(struct brw_bufmgr *bufmgr,
}
 }
 
+/* search for a suitable cached bo for reuse */
 static struct brw_bo *
-bo_alloc_internal(struct brw_bufmgr *bufmgr,
-  const char *name,
-  uint64_t size,
-  unsigned flags,
-  uint32_t tiling_mode,
-  uint32_t stride)
+cached_bo_for_size(struct brw_bufmgr *bufmgr,
+   uint64_t size,
+   uint32_t tiling_mode,
+   uint32_t stride,
+   unsigned flags)
 {
-   struct brw_bo *bo;
-   unsigned int page_size = getpagesize();
-   int ret;
-   struct bo_cache_bucket *bucket;
-   bool alloc_from_cache;
-   uint64_t bo_size;
-   bool busy = false;
-   bool zeroed = false;
-
-   if (flags & BO_ALLOC_BUSY)
-  busy = true;
-
-   if (flags & BO_ALLOC_ZEROED)
-  zeroed = true;
+   bool busy = (flags & BO_ALLOC_BUSY) ? true : false;
+   bool zeroed = (flags & BO_ALLOC_ZEROED) ? true : false;
+   struct bo_cache_bucket *bucket =
+  (bufmgr->bo_reuse) ? bucket_for_size(bufmgr, size) : NULL;
 
/* BUSY does doesn't really jive with ZEROED as we have to wait for it to
 * be idle before we can memset.  Just disallow that combination.
 */
assert(!(busy && zeroed));
 
-   /* Round the allocated size up to a power of two number of pages. */
-   bucket = bucket_for_size(bufmgr, size);
-
-   /* If we don't have caching at this size, don't actually round the
-* allocation up.
-*/
-   if (bucket == NULL) {
-  bo_size = size;
-  if (bo_size < page_size)
- bo_size = page_size;
-   } else {
-  bo_size = bucket->size;
-   }
-
-   mtx_lock(&bufmgr->lock);
-   /* Get a buffer out of the cache if available */
+   if(bucket != NULL && !list_empty(&bucket->head)) {
+  struct brw_bo *bo;
 retry:
-   alloc_from_cache = false;
-   if (bucket != NULL && !list_empty(&bucket->head)) {
+  bo = NULL;
+
   if (busy && !zeroed) {
  /* Allocate new render-target BOs from the tail (MRU)
   * of the list, as it will likely be hot in the GPU
@@ -319,7 +295,6 @@ retry:
   */
  bo = LIST_ENTRY(struct brw_bo, bucket->head.prev, head);
  list_del(&bo->head);
- alloc_from_cache = true;
   } else {
  /* For non-render-target BOs (where we're probably
   * going to map it first thing in order to fill it
@@ -330,16 +305,15 @@ retry:
   */
  bo = LIST_ENTRY(struct brw_bo, bucket->head.next, head);
  if (!brw_bo_busy(bo)) {
-alloc_from_cache = true;
 list_del(&bo->head);
  }
   }
 
-  if (alloc_from_cache) {
+  if (bo) {
  if (!brw_bo_madvise(bo, I915_MADV_WILLNEED)) {
 bo_free(bo);
 brw_bo_cache_purge_bucket(bufmgr, bucket);
-goto retry;
+return NULL;
  }
 
  if (bo_set_tiling_internal(bo, tiling_mode, stride)) {
@@ -353,20 +327,44 @@ retry:
bo_free(bo);
goto retry;
 }
-memset(map, 0, bo_size);
+memset(map, 0, bo->size);
  }
   }
+
+  return bo;
}
 
-   if (!alloc_from_cache) {
+   return NULL;
+}
+
+static struct brw_bo *
+bo_alloc_internal(struct brw_bufmgr *bufmgr,
+  const char *name,
+  uint64_t size,
+  unsigned flags,
+  uint32_t tiling_mode,
+  uint32_t stride)
+{
+   struct brw_bo *bo;
+   int ret;
+
+   /* align the request size to page size */
+   size = ALIGN(size, getpagesize());
+
+   mtx_lock(&bufmgr->lock);
+
+   /* Get a buffer out of the cache if available */
+   bo = cached_bo_for_size(bufmgr, size, tiling_mode, stride, flags);
+
+   if (bo == NULL) {
   bo = calloc(1, sizeof(*bo));
   if (!bo)
  goto err;
 
-  bo->size = bo_size;
+  bo->size = size;
   bo->idle = true;
 
-  struct drm_i915_gem_create create = { .size = bo_size };
+  struct drm_i915_gem_create create = { .size = size };
 
   /* All new BOs we get from the kernel are zeroed, so we don't need to
* worry about that here.
@@ -413,7 +411,7 @@ retry:
mtx_unlock(&bufmgr->lock);
 
DBG("bo_create: buf %d (%s) %llub\n", bo->gem_handle, bo->name,
-   (unsigned long long) size);
+   (unsigned long long) bo->size);
 
return bo;
 
-- 
2.7.4

___
mesa-dev mailing list
m

[Mesa-dev] [PATCH 3/4] i965/drm: Searching for a cached buffer for reuse

2018-05-01 Thread James Xiong
From: "Xiong, James" 

Now that a bucket contains cached buffers with different sizes, go
through its list and search for a cached buffer with enough size.

Signed-off-by: Xiong, James 
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 21 +++--
 src/util/list.h|  5 +
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 6a9b005..5235aa6 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -281,7 +281,7 @@ cached_bo_for_size(struct brw_bufmgr *bufmgr,
assert(!(busy && zeroed));
 
if(bucket != NULL && !list_empty(&bucket->head)) {
-  struct brw_bo *bo;
+  struct brw_bo *bo, *temp_bo;
 retry:
   bo = NULL;
 
@@ -292,8 +292,13 @@ retry:
   * asked us to zero the buffer, we don't want this
   * because we are going to mmap it.
   */
- bo = LIST_ENTRY(struct brw_bo, bucket->head.prev, head);
- list_del(&bo->head);
+ LIST_FOR_EACH_ENTRY_REV(temp_bo, &bucket->head, head) {
+if (temp_bo->size >= size) {
+   bo = temp_bo;
+   list_del(&bo->head);
+   break;
+}
+ }
   } else {
  /* For non-render-target BOs (where we're probably
   * going to map it first thing in order to fill it
@@ -302,9 +307,13 @@ retry:
   * allocating a new buffer is probably faster than
   * waiting for the GPU to finish.
   */
- bo = LIST_ENTRY(struct brw_bo, bucket->head.next, head);
- if (!brw_bo_busy(bo)) {
-list_del(&bo->head);
+ LIST_FOR_EACH_ENTRY(temp_bo, &bucket->head, head) {
+if (temp_bo->size >= size &&
+!brw_bo_busy(temp_bo)) {
+   bo = temp_bo;
+   list_del(&bo->head);
+   break;
+}
  }
   }
 
diff --git a/src/util/list.h b/src/util/list.h
index 6edb750..9362072 100644
--- a/src/util/list.h
+++ b/src/util/list.h
@@ -189,6 +189,11 @@ static inline void list_validate(struct list_head *list)
&pos->member != (head); \
pos = container_of(pos->member.next, pos, member))
 
+#define LIST_FOR_EACH_ENTRY_REV(pos, head, member)  \
+   for (pos = NULL, pos = container_of((head)->prev, pos, member);  \
+&pos->member != (head); \
+pos = container_of(pos->member.prev, pos, member))
+
 #define LIST_FOR_EACH_ENTRY_SAFE(pos, storage, head, member)   \
for (pos = NULL, pos = container_of((head)->next, pos, member), \
storage = container_of(pos->member.next, pos, member);  \
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/4] improve buffer cache and reuse

2018-05-01 Thread James Xiong
From: "Xiong, James" 

With the current implementation, brw_bufmgr may round up a request
size to the next bucket size, result in 25% more memory allocated in
the worst senario. For example:
Request sizeActual size
32KB+1Byte  40KB
.
8MB+1Byte   10MB
.
96MB+1Byte  112MB
This series align the buffer size up to page instead of a bucket size
to improve memory allocation efficiency. Performances are almost the
same with Basemark ES3, GfxBench4 and 5:

Basemark ES3
   scorepeak memory allocation
  before  afterdiffbeforeafter  diff
21.537462  21.888784  1.61%419766272  408809472  -10956800
19.566198  19.763429  1.00% 

GfxBench 4.0
scorepeak memory
 before after diff before   after 
diff
gl_4 564.6052246094  565.2348632813  0.11%  578490368 550199296 
-28291072
gl_4_off 727.0440063477  703.5833129883  -3.33% 629501952 598216704 
-31285248
gl_manhattan 1053.4223632813 1057.3690185547 0.37%  449568768 421134336 
-28434432
gl_trex  2708.0656738281 2699.2646484375 -0.33% 130076672 125042688 
-5033984
gl_alu2  1207.1490478516 1212.2220458984 0.42%  55496704  55029760  
-466944
gl_driver2   103.0383071899  103.5478439331  0.49%  13107200  12980224  
-126976
gl_manhattan_off 1703.4780273438 1736.9074707031 1.92%  490016768 456548352 
-33468416
gl_trex_off  2951.6809082031 3058.5422363281 3.49%  157511680 152260608 
-5251072
gl_alu2_off  2604.0903320313 2626.2524414063 0.84%  86130688  85483520  
-647168
gl_driver2_off   204.0173187256  207.0510101318  1.47%  40869888  40615936  
-253952

GfxBench 5.0
score   peak memory 
 before after   before after   diff
gl_5   259   259  1137549312  1038286848 -99262464
gl_5_off   297   297  1170853888  1071357952 -99495936

Xiong, James (4):
  i965/drm: Reorganize code for the next patch
  i965/drm: Round down buffer size and calculate the bucket index
  i965/drm: Searching for a cached buffer for reuse
  i965/drm: Purge the bucket when its cached buffer is evicted

 src/mesa/drivers/dri/i965/brw_bufmgr.c | 139 ++---
 src/util/list.h|   5 ++
 2 files changed, 79 insertions(+), 65 deletions(-)

-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] i965/drm: Round down buffer size and calculate the bucket index

2018-05-01 Thread James Xiong
From: "Xiong, James" 

a buffer is now put in cached bucket #n when its size is between
bucket[n].size and bucket[n+1].size - 1

Signed-off-by: Xiong, James 
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index e68da26..6a9b005 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -189,8 +189,8 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t pitch, 
uint32_t tiling)
 static struct bo_cache_bucket *
 bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)
 {
-   /* Calculating the pages and rounding up to the page size. */
-   const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+   /* Calculating the pages and rounding down to the  page size. */
+   const unsigned pages = (size < PAGE_SIZE) ? 1 : size / PAGE_SIZE;
 
/* Row  Bucket sizesclz((x-1) | 3)   RowColumn
 *in pages  stride   size
@@ -211,8 +211,7 @@ bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)
int col_size_log2 = row - 1;
col_size_log2 += (col_size_log2 < 0);
 
-   const unsigned col = (pages - prev_row_max_pages +
-((1 << col_size_log2) - 1)) >> col_size_log2;
+   const unsigned col = (pages - prev_row_max_pages) >> col_size_log2;
 
/* Calculating the index based on the row and column. */
const unsigned index = (row * 4) + (col - 1);
@@ -1285,9 +1284,9 @@ add_bucket(struct brw_bufmgr *bufmgr, int size)
bufmgr->cache_bucket[i].size = size;
bufmgr->num_buckets++;
 
+   assert(bucket_for_size(bufmgr, size - 1) == 
&bufmgr->cache_bucket[i==0?0:i-1]);
assert(bucket_for_size(bufmgr, size) == &bufmgr->cache_bucket[i]);
-   assert(bucket_for_size(bufmgr, size - 2048) == &bufmgr->cache_bucket[i]);
-   assert(bucket_for_size(bufmgr, size + 1) != &bufmgr->cache_bucket[i]);
+   assert(bucket_for_size(bufmgr, size + 1) == &bufmgr->cache_bucket[i]);
 }
 
 static void
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] i965/drm: Purge the bucket when its cached buffer is evicted

2018-05-01 Thread James Xiong
From: "Xiong, James" 

When one of cached buffers is found to be evicted by kernel,
most likely the buffers freed earlier than this buffer are
gone too, go through the cached list in the bucket and purge.

Signed-off-by: Xiong, James 
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 5235aa6..9f2e566 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -248,17 +248,20 @@ brw_bo_madvise(struct brw_bo *bo, int state)
return madv.retained;
 }
 
-/* drop the oldest entries that have been purged by the kernel */
+/* drop the entries that are older than the given time */
 static void
 brw_bo_cache_purge_bucket(struct brw_bufmgr *bufmgr,
-  struct bo_cache_bucket *bucket)
+  struct bo_cache_bucket *bucket,
+  time_t time)
 {
list_for_each_entry_safe(struct brw_bo, bo, &bucket->head, head) {
-  if (brw_bo_madvise(bo, I915_MADV_DONTNEED))
+  if (bo->free_time >= time) {
+ brw_bo_madvise(bo, I915_MADV_DONTNEED);
+ list_del(&bo->head);
+ bo_free(bo);
+  } else {
  break;
-
-  list_del(&bo->head);
-  bo_free(bo);
+  }
}
 }
 
@@ -319,8 +322,8 @@ retry:
 
   if (bo) {
  if (!brw_bo_madvise(bo, I915_MADV_WILLNEED)) {
+brw_bo_cache_purge_bucket(bufmgr, bucket, bo->free_time);
 bo_free(bo);
-brw_bo_cache_purge_bucket(bufmgr, bucket);
 return NULL;
  }
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/1] main: fail texture_storage() call if the size is not okay

2018-04-27 Thread James Xiong
From: "Xiong, James" 

Signed-off-by: Xiong, James 
---
 src/mesa/main/texstorage.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/main/texstorage.c b/src/mesa/main/texstorage.c
index 44edba3..9cb8b90 100644
--- a/src/mesa/main/texstorage.c
+++ b/src/mesa/main/texstorage.c
@@ -445,6 +445,7 @@ texture_storage(struct gl_context *ctx, GLuint dims,
 _mesa_error(ctx, GL_OUT_OF_MEMORY,
 "glTex%sStorage%uD(texture too large)",
 suffix, dims);
+return;
  }
   }
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/1] main: return 0 length when the queried program object's not linked

2018-04-27 Thread James Xiong
On Fri, 27 Apr 2018 12:32:09 +1000
Timothy Arceri  wrote:

> Reviewed-by: Timothy Arceri 
> 
> It would also be nice to have a piglit test for this case. I assume
> mesa probably crashes without this patch?
Thanks for the review. Yes there was a segmentation fault.
> 
> On 27/04/18 11:39, James Xiong wrote:
> > From: "Xiong, James" 
> > 
> > Signed-off-by: Xiong, James 
> > ---
> >   src/mesa/main/shaderapi.c | 2 +-
> >   1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
> > index 44b18af..caa4254 100644
> > --- a/src/mesa/main/shaderapi.c
> > +++ b/src/mesa/main/shaderapi.c
> > @@ -837,7 +837,7 @@ get_programiv(struct gl_context *ctx, GLuint
> > program, GLenum pname, *params = shProg->BinaryRetreivableHint;
> > return;
> >  case GL_PROGRAM_BINARY_LENGTH:
> > -  if (ctx->Const.NumProgramBinaryFormats == 0) {
> > +  if (ctx->Const.NumProgramBinaryFormats == 0
> > || !shProg->data->LinkStatus) { *params = 0;
> > } else {
> >_mesa_get_program_binary_length(ctx, shProg, params);
> >   

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/1] main: return 0 length when the queried program object's not linked

2018-04-26 Thread James Xiong
From: "Xiong, James" 

Signed-off-by: Xiong, James 
---
 src/mesa/main/shaderapi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 44b18af..caa4254 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -837,7 +837,7 @@ get_programiv(struct gl_context *ctx, GLuint program, 
GLenum pname,
   *params = shProg->BinaryRetreivableHint;
   return;
case GL_PROGRAM_BINARY_LENGTH:
-  if (ctx->Const.NumProgramBinaryFormats == 0) {
+  if (ctx->Const.NumProgramBinaryFormats == 0 || 
!shProg->data->LinkStatus) {
  *params = 0;
   } else {
  _mesa_get_program_binary_length(ctx, shProg, params);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/1] i965: Make sure the shadow buffers have enough space

2018-04-13 Thread James Xiong
On Fri, 13 Apr 2018 14:33:09 -0700
Kenneth Graunke  wrote:

> On Friday, April 13, 2018 2:08:40 PM PDT James Xiong wrote:
> > On Fri, 13 Apr 2018 13:51:02 -0700
> > Kenneth Graunke  wrote:
> >   
> > > On Friday, April 13, 2018 1:35:45 PM PDT Kenneth Graunke wrote:  
> > > > On Monday, April 9, 2018 4:06:16 PM PDT James Xiong wrote:
> > > > > From: "Xiong, James" 
> > > > > 
> > > > > On non-LLC platforms, we malloc shadow batch/state buffers
> > > > > of the same sizes as our batch/state buffers' GEM allocations.
> > > > > However the buffer allocator reuses similar-sized gem objects,
> > > > > it returns a buffer larger than we asked for in some cases
> > > > > and we end up with smaller shadow buffers. If we utilize the
> > > > > full-size of the over-allocated batch/state buffers, we may
> > > > > wind up accessing beyond the bounds of the shadow buffers and
> > > > > cause segmentation fault and/or memory corruption.
> > > > 
> > > > Oh, good catch!  We do indeed malloc too little if the bufmgr  
> > Thank you for taking time to review, Kenneth.  
> > > > 
> > > > > A few examples:
> > > > >  casebatch  state
> > > > >  request bo   shadow request bo  shadow
> > > > > init020K 20K  20K16K 16K 16K
> > > > > grow_buffer 130K 32K  30K24K 24K 24K
> > > > > grow_buffer 248K 48K  48K36K 40K 36K
> > > > > grow_buffer 372K 80K  72K60K 64K 60K
> > > > > grow_buffer 4120K128K 120K   -   -   -
> > > > > 
> > > > > batch #1, #3, #4; state #2 and #3 are problematic. We can
> > > > > change the order to allocate the bo first, then allocate the
> > > > > shadow buffer using the bo's size so that the shadow buffer
> > > > > have at least an equivalent size of the gem allocation.
> > > > > 
> > > > > Another problem: even though the state/batch buffer could
> > > > > grow, when checking if it runs out space, we always compare
> > > > > with the initial batch/state sizes. To utilize the entire
> > > > > buffers, change to compare with the actual sizes.
> > > > 
> > > > This is actually intentional.  Our goal is to flush batches
> > > > when the amount of commands or state reaches those thresholds.
> > > > Occasionally, we'll be in the middle of emitting a draw, and
> > > > unable to stop.  In that case, we grow the batch and keep
> > > > going.  But after that, we're beyond our original target, so we
> > > > flush next time.  We don't want to grow without bounds...it's
> > > > meant more for emergencies, or if we've badly estimated the
> > > > size of the draw call.  
> > I am not sure I get it. Let me give an example: the state buffer
> > gets grown once from 16K to 24K in brw_state_batch(), the used_size
> > becomes 20K, then brw_require_statebuffer_space(1024) gets called to
> > ask for 1K space, with the original logical, it compares the used
> > size with 16K and flush the batch even though the state buffer
> > still has 4K space available?  
> 
> Yes, the idea is to flush at around 16K of state.  If we happen to be
> in the middle of a draw and run out of space, we'll grow to 24K.
> Once it's over 16K, we flush as soon as we can.
> 
> We'd like to be fairly consistent on our batch size.  Running larger
> batches can lead to differences in performance, and large batches can
> lead to degradation in the interactivity of the system (especially on
> GPUs without preemption).
> 
> The hope is to grow once, at most.  If we check against the BO size,
> we might grow repeatedly, which would lead to really large batches and
> things would get out of hand.
I see, thanks for the explanation, Kenneth.
> 
> > > > 
> > > > I've sent a simpler patch which I think should hopefully fix
> > > > your bug: https://patchwork.freedesktop.org/patch/217107/
> > > 
> > > Lionel noticed that I botched that patch.  Here's an actual one:
> > > 
> > > https://patchwork.freedesktop.org/patch/217108/  
> > Yes it will fix the existing bug. However the assumption here is
> > that the init allocation size will NOT be rounded up as it happens
> > to be the bucket size.
> > I am working on an optimization to improve memory usage(that's how
> > I find out this bug), this assumption is no longer true.
> > Essentially the bufmgr could return a buffer with the same or
> > larger size whether it is same as the bucket's or not. Anyway I
> > guess I can send the fix later along with the optimization
> > patches.  
> 
> Ah, that's a good point.  Your patch also tries to use the BO size
> for the initial malloc as well, which is a good idea...
So what do you want? you want me to change this patch and sent for
review or take yours for now.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/1] i965: Make sure the shadow buffers have enough space

2018-04-13 Thread James Xiong
On Fri, 13 Apr 2018 13:51:02 -0700
Kenneth Graunke  wrote:

> On Friday, April 13, 2018 1:35:45 PM PDT Kenneth Graunke wrote:
> > On Monday, April 9, 2018 4:06:16 PM PDT James Xiong wrote:  
> > > From: "Xiong, James" 
> > > 
> > > On non-LLC platforms, we malloc shadow batch/state buffers
> > > of the same sizes as our batch/state buffers' GEM allocations.
> > > However the buffer allocator reuses similar-sized gem objects,
> > > it returns a buffer larger than we asked for in some cases
> > > and we end up with smaller shadow buffers. If we utilize the
> > > full-size of the over-allocated batch/state buffers, we may wind
> > > up accessing beyond the bounds of the shadow buffers and cause
> > > segmentation fault and/or memory corruption.  
> > 
> > Oh, good catch!  We do indeed malloc too little if the bufmgr
Thank you for taking time to review, Kenneth.
> >   
> > > A few examples:
> > >  casebatch  state
> > >  request bo   shadow request bo  shadow
> > > init020K 20K  20K16K 16K 16K
> > > grow_buffer 130K 32K  30K24K 24K 24K
> > > grow_buffer 248K 48K  48K36K 40K 36K
> > > grow_buffer 372K 80K  72K60K 64K 60K
> > > grow_buffer 4120K128K 120K   -   -   -
> > > 
> > > batch #1, #3, #4; state #2 and #3 are problematic. We can change
> > > the order to allocate the bo first, then allocate the shadow
> > > buffer using the bo's size so that the shadow buffer have at
> > > least an equivalent size of the gem allocation.
> > > 
> > > Another problem: even though the state/batch buffer could grow,
> > > when checking if it runs out space, we always compare with the
> > > initial batch/state sizes. To utilize the entire buffers, change
> > > to compare with the actual sizes.  
> > 
> > This is actually intentional.  Our goal is to flush batches when the
> > amount of commands or state reaches those thresholds.  Occasionally,
> > we'll be in the middle of emitting a draw, and unable to stop.  In
> > that case, we grow the batch and keep going.  But after that, we're
> > beyond our original target, so we flush next time.  We don't want
> > to grow without bounds...it's meant more for emergencies, or if
> > we've badly estimated the size of the draw call.
I am not sure I get it. Let me give an example: the state buffer
gets grown once from 16K to 24K in brw_state_batch(), the used_size
becomes 20K, then brw_require_statebuffer_space(1024) gets called to
ask for 1K space, with the original logical, it compares the used size
with 16K and flush the batch even though the state buffer still has 4K
space available?
> > 
> > I've sent a simpler patch which I think should hopefully fix your
> > bug: https://patchwork.freedesktop.org/patch/217107/  
> 
> Lionel noticed that I botched that patch.  Here's an actual one:
> 
> https://patchwork.freedesktop.org/patch/217108/
Yes it will fix the existing bug. However the assumption here is
that the init allocation size will NOT be rounded up as it happens to
be the bucket size.
I am working on an optimization to improve memory usage(that's how
I find out this bug), this assumption is no longer true. Essentially the
bufmgr could return a buffer with the same or larger size whether it is
same as the bucket's or not. Anyway I guess I can send the fix
later along with the optimization patches.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/1] i965: Make sure the shadow buffers have enough space

2018-04-09 Thread James Xiong
From: "Xiong, James" 

On non-LLC platforms, we malloc shadow batch/state buffers
of the same sizes as our batch/state buffers' GEM allocations.
However the buffer allocator reuses similar-sized gem objects,
it returns a buffer larger than we asked for in some cases
and we end up with smaller shadow buffers. If we utilize the
full-size of the over-allocated batch/state buffers, we may wind
up accessing beyond the bounds of the shadow buffers and cause
segmentation fault and/or memory corruption.

A few examples:
 casebatch  state
 request bo   shadow request bo  shadow
init020K 20K  20K16K 16K 16K
grow_buffer 130K 32K  30K24K 24K 24K
grow_buffer 248K 48K  48K36K 40K 36K
grow_buffer 372K 80K  72K60K 64K 60K
grow_buffer 4120K128K 120K   -   -   -

batch #1, #3, #4; state #2 and #3 are problematic. We can change
the order to allocate the bo first, then allocate the shadow
buffer using the bo's size so that the shadow buffer have at
least an equivalent size of the gem allocation.

Another problem: even though the state/batch buffer could grow,
when checking if it runs out space, we always compare with the
initial batch/state sizes. To utilize the entire buffers, change
to compare with the actual sizes.

Cc: mesa-sta...@lists.freedesktop.org
Signed-off-by: Xiong, James 
---
 src/mesa/drivers/dri/i965/brw_context.h   |  1 +
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 49 +--
 2 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index f049d08..39aae08 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -477,6 +477,7 @@ struct brw_growing_bo {
struct brw_bo *partial_bo;
uint32_t *partial_bo_map;
unsigned partial_bytes;
+   unsigned shadow_size;
 };
 
 struct intel_batchbuffer {
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 7286140..facbbf8 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -107,12 +107,6 @@ intel_batchbuffer_init(struct brw_context *brw)
 
batch->use_shadow_copy = !devinfo->has_llc;
 
-   if (batch->use_shadow_copy) {
-  batch->batch.map = malloc(BATCH_SZ);
-  batch->map_next = batch->batch.map;
-  batch->state.map = malloc(STATE_SZ);
-   }
-
init_reloc_list(&batch->batch_relocs, 250);
init_reloc_list(&batch->state_relocs, 250);
 
@@ -212,10 +206,25 @@ intel_batchbuffer_reset(struct brw_context *brw)
batch->last_bo = batch->batch.bo;
 
recreate_growing_buffer(brw, &batch->batch, "batchbuffer", BATCH_SZ);
-   batch->map_next = batch->batch.map;
 
recreate_growing_buffer(brw, &batch->state, "statebuffer", STATE_SZ);
 
+   if (batch->use_shadow_copy) {
+  if (batch->batch.shadow_size < batch->batch.bo->size) {
+ free(batch->batch.map);
+ batch->batch.map = malloc(batch->batch.bo->size);
+ batch->batch.shadow_size = batch->batch.bo->size;
+  }
+
+  if (batch->state.shadow_size < batch->state.bo->size) {
+ free(batch->state.map);
+ batch->state.map = malloc(batch->state.bo->size);
+ batch->state.shadow_size = batch->state.bo->size;
+  }
+   }
+
+   batch->map_next = batch->batch.map;
+
/* Avoid making 0 a valid state offset - otherwise the decoder will try
 * and decode data when we use offset 0 as a null pointer.
 */
@@ -361,7 +370,8 @@ grow_buffer(struct brw_context *brw,
* breaking existing pointers the caller may still be using.  Just
* malloc a new copy and memcpy it like the normal BO path.
*/
-  grow->map = malloc(new_size);
+  grow->map = malloc(new_bo->size);
+  grow->shadow_size = new_bo->size;
} else {
   grow->map = brw_bo_map(brw, new_bo, MAP_READ | MAP_WRITE);
}
@@ -467,15 +477,17 @@ intel_batchbuffer_require_space(struct brw_context *brw, 
GLuint sz,
}
 
const unsigned batch_used = USED_BATCH(*batch) * 4;
-   if (batch_used + sz >= BATCH_SZ && !batch->no_wrap) {
-  intel_batchbuffer_flush(brw);
-   } else if (batch_used + sz >= batch->batch.bo->size) {
-  const unsigned new_size =
- MIN2(batch->batch.bo->size + batch->batch.bo->size / 2,
-  MAX_BATCH_SIZE);
-  grow_buffer(brw, &batch->batch, batch_used, new_size);
-  batch->map_next = (void *) batch->batch.map + batch_used;
-  assert(batch_used + sz < batch->batch.bo->size);
+   if (batch_used + sz >= batch->batch.bo->size) {
+  if (!batch->no_wrap) {
+ intel_batchbuffer_flush(brw);
+  } else {
+ const unsigned new_size =
+MIN2(batch->batch.bo->size + batch->batch.bo->size / 2,
+ MAX_BATCH_SIZE);
+ grow_buff

[Mesa-dev] [PATCH 1/1] i965: return the fourcc saved in __DRIimage when possible

2018-04-05 Thread James Xiong
From: "Xiong, James" 

When creating a image from a texture, the image's dri_format is
set to the first plane's format, and used to look up for the
fourcc. e.g. for FOURCC_NV12 texture, the dri_format is set to
__DRI_IMAGE_FORMAT_R8, we end up with a wrong entry in function
intel_lookup_fourcc():
   { __DRI_IMAGE_FOURCC_R8, __DRI_IMAGE_COMPONENTS_R, 1,
 { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, } },
instead of the corret one:
   { __DRI_IMAGE_FOURCC_NV12, __DRI_IMAGE_COMPONENTS_Y_UV, 2,
 { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
   { 1, 1, 1, __DRI_IMAGE_FORMAT_GR88, 2 } } },
as a result, a wrong fourcc __DRI_IMAGE_FOURCC_R8 was returned.

To fix this bug, the image inherits the texture's planar_format that
has the original fourcc; Upon querying, if planar_format is set,
return the saved fourcc; Otherwise fall back to the old way.

v3: add a bug description and "cc mesa-stable" tag (Jason)
  remove abandunt null pointer check (Tapani)
  squash 2 patches into one (James)
v2: fall back to intel_lookup_fourcc() when planar_format is NULL
  (Dongwon & Matt Roper)

Cc: mesa-sta...@lists.freedesktop.org
Signed-off-by: Xiong, James 
---
 src/mesa/drivers/dri/i965/intel_screen.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index dcb98da..29cb7ad 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -388,10 +388,16 @@ intel_image_format_lookup(int fourcc)
return NULL;
 }
 
-static boolean intel_lookup_fourcc(int dri_format, int *fourcc)
+static boolean
+intel_image_get_fourcc(__DRIimage *image, int *fourcc)
 {
+   if (image->planar_format) {
+  *fourcc = image->planar_format->fourcc;
+  return true;
+   }
+
for (unsigned i = 0; i < ARRAY_SIZE(intel_image_formats); i++) {
-  if (intel_image_formats[i].planes[0].dri_format == dri_format) {
+  if (intel_image_formats[i].planes[0].dri_format == image->dri_format) {
  *fourcc = intel_image_formats[i].fourcc;
  return true;
   }
@@ -578,6 +584,7 @@ intel_create_image_from_texture(__DRIcontext *context, int 
target,
intel_setup_image_from_mipmap_tree(brw, image, iobj->mt, level, zoffset);
image->dri_format = driGLFormatToImageFormat(image->format);
image->has_depthstencil = iobj->mt->stencil_mt? true : false;
+   image->planar_format = iobj->planar_format;
if (image->dri_format == MESA_FORMAT_NONE) {
   *error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
   free(image);
@@ -869,7 +876,7 @@ intel_query_image(__DRIimage *image, int attrib, int *value)
case __DRI_IMAGE_ATTRIB_FD:
   return !brw_bo_gem_export_to_prime(image->bo, value);
case __DRI_IMAGE_ATTRIB_FOURCC:
-  return intel_lookup_fourcc(image->dri_format, value);
+  return intel_image_get_fourcc(image, value);
case __DRI_IMAGE_ATTRIB_NUM_PLANES:
   if (isl_drm_modifier_has_aux(image->modifier)) {
  assert(!image->planar_format || image->planar_format->nplanes == 1);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965: return the fourcc saved in __DRIimage

2018-04-05 Thread James Xiong
On Thu, 5 Apr 2018 20:57:46 +0300
Tapani Pälli  wrote:

> On 05.04.2018 18:43, James Xiong wrote:
> > On Thu, 5 Apr 2018 14:30:02 +0300
> > Tapani Pälli  wrote:
> >   
> >> On 04/05/2018 02:51 AM, James Xiong wrote:  
> >>> From: "Xiong, James" 
> >>>
> >>> The planar_format in __DRIimage contains the original fourcc
> >>> used to create the image, if it's set, return the saved fourcc
> >>> directly; Otherwise fall back to the old way.
> >>>
> >>> Also we should validate the input parameter "value" first as it
> >>> might be NULL based on the SPEC.
> >>>
> >>> v2: fall back to intel_lookup_fourcc() when planar_format is NULL
> >>> (by Dongwon & Matt Roper)
> >>>
> >>> Signed-off-by: Xiong, James 
> >>> ---
> >>>src/mesa/drivers/dri/i965/intel_screen.c | 15 ---
> >>>1 file changed, 12 insertions(+), 3 deletions(-)
> >>>
> >>> diff --git a/src/mesa/drivers/dri/i965/intel_screen.c
> >>> b/src/mesa/drivers/dri/i965/intel_screen.c index 7df8bc4..aeecef3
> >>> 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c
> >>> +++ b/src/mesa/drivers/dri/i965/intel_screen.c
> >>> @@ -388,10 +388,16 @@ intel_image_format_lookup(int fourcc)
> >>>   return NULL;
> >>>}
> >>>
> >>> -static boolean intel_lookup_fourcc(int dri_format, int *fourcc)
> >>> +static boolean
> >>> +intel_image_get_fourcc(__DRIimage *image, int *fourcc)
> >>>{
> >>> +   if (image->planar_format) {
> >>> +  *fourcc = image->planar_format->fourcc;
> >>> +  return true;
> >>> +   }
> >>> +
> >>>   for (unsigned i = 0; i < ARRAY_SIZE(intel_image_formats);
> >>> i++) {
> >>> -  if (intel_image_formats[i].planes[0].dri_format ==
> >>> dri_format) {
> >>> +  if (intel_image_formats[i].planes[0].dri_format ==
> >>> image->dri_format) { *fourcc = intel_image_formats[i].fourcc;
> >>> return true;
> >>>  }
> >>> @@ -844,6 +850,9 @@ intel_create_image_with_modifiers(__DRIscreen
> >>> *dri_screen, static GLboolean
> >>>intel_query_image(__DRIimage *image, int attrib, int *value)
> >>>{
> >>> +   if (value == NULL)
> >>> +  return false;
> >>> +  
> >>
> >> I would remove this check, we've been fine many years without it.  
> > The function spec does say: ",  and 
> > may be NULL, in which case no value is retrieved."
> > it's better to stick to it and have an extra check than
> > segmentation fault, what do you say?  
> 
> Function modified here 'intel_query_image' is part of DRI interface 
> (dri_interface.h) with no such spec, it is used from many existing 
> places and none of them seem to be passing NULL.
> 
> Now that I know you are using eglExportDMABUFImageMESA, I can also
> see that dri2_export_dma_buf_image_mesa implementation does not call
> this API when 'fds', 'strides' or 'offsets' is NULL, it checks this
> in the implementation before calling.
> 
You are right it's already been checked in the upper level. I will
remove it.
> 
> >>  
> >>>   switch (attrib) {
> >>>   case __DRI_IMAGE_ATTRIB_STRIDE:
> >>>  *value = image->pitch;
> >>> @@ -870,7 +879,7 @@ intel_query_image(__DRIimage *image, int
> >>> attrib, int *value) case __DRI_IMAGE_ATTRIB_FD:
> >>>  return !brw_bo_gem_export_to_prime(image->bo, value);
> >>>   case __DRI_IMAGE_ATTRIB_FOURCC:
> >>> -  return intel_lookup_fourcc(image->dri_format, value);
> >>> +  return intel_image_get_fourcc(image, value);
> >>>   case __DRI_IMAGE_ATTRIB_NUM_PLANES:
> >>>  if (isl_drm_modifier_has_aux(image->modifier)) {
> >>> assert(!image->planar_format ||
> >>> image->planar_format->nplanes == 1);  
> >   
> 
> // Tapani

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965: init image's planar_format in intel_create_image_from_texture

2018-04-05 Thread James Xiong
On Thu, 5 Apr 2018 08:56:54 -0700
Jason Ekstrand  wrote:

> On Thu, Apr 5, 2018 at 8:45 AM, James Xiong 
> wrote:
> 
> > On Thu, 5 Apr 2018 08:24:27 -0700
> > Jason Ekstrand  wrote:
> >  
> > > Does this fix a bug?  If so, what?  
> > Jason, yes. I am sorry for the confusion, please see my earlier
> > reply to Tapani.
> >  
> 
> If it fixes a bug then the commit message should include a
> description of the bug fixed.  If the bug is a regression, it should
> have a "Fixes: SHA1" tag indicating where the regression originated
> and if it is a bug that has been around for a very long time, it
> should have a "Cc: mesa-sta...@lists.freedesktop.org" tag.  If the
> bug is filed in bugzilla, it should have a "Bugzilla: " tag.
> 
> --Jason
Thanks for the tip, Jason. I will make the corresponding changes in the
next version.
> 
> 
> 
> > >
> > > On April 4, 2018 16:57:21 James Xiong 
> > > wrote: 
> > > > From: "Xiong, James" 
> > > >
> > > > When creating a image from a texture, initialize the image's
> > > > planar_format with the texture's.
> > > >
> > > > Signed-off-by: Xiong, James 
> > > > ---
> > > > src/mesa/drivers/dri/i965/intel_screen.c | 1 +
> > > > 1 file changed, 1 insertion(+)
> > > >
> > > > diff --git a/src/mesa/drivers/dri/i965/intel_screen.c
> > > > b/src/mesa/drivers/dri/i965/intel_screen.c
> > > > index dcb98da..7df8bc4 100644
> > > > --- a/src/mesa/drivers/dri/i965/intel_screen.c
> > > > +++ b/src/mesa/drivers/dri/i965/intel_screen.c
> > > > @@ -578,6 +578,7 @@ intel_create_image_from_texture(__DRIcontext
> > > > *context, int target,
> > > >intel_setup_image_from_mipmap_tree(brw, image, iobj->mt,
> > > > level, zoffset); image->dri_format =
> > > > driGLFormatToImageFormat(image->format);
> > > > image->has_depthstencil = iobj->mt->stencil_mt? true : false;
> > > > +   image->planar_format = iobj->planar_format;
> > > >if (image->dri_format == MESA_FORMAT_NONE) {
> > > >   *error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
> > > >   free(image);
> > > > --
> > > > 2.7.4
> > > >
> > > > ___
> > > > mesa-dev mailing list
> > > > mesa-dev@lists.freedesktop.org
> > > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev  
> > >
> > >
> > >  
> >
> >  

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965: init image's planar_format in intel_create_image_from_texture

2018-04-05 Thread James Xiong
On Thu, 5 Apr 2018 08:24:27 -0700
Jason Ekstrand  wrote:

> Does this fix a bug?  If so, what?
Jason, yes. I am sorry for the confusion, please see my earlier reply
to Tapani.
> 
> On April 4, 2018 16:57:21 James Xiong  wrote:
> 
> > From: "Xiong, James" 
> >
> > When creating a image from a texture, initialize the image's
> > planar_format with the texture's.
> >
> > Signed-off-by: Xiong, James 
> > ---
> > src/mesa/drivers/dri/i965/intel_screen.c | 1 +
> > 1 file changed, 1 insertion(+)
> >
> > diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
> > b/src/mesa/drivers/dri/i965/intel_screen.c
> > index dcb98da..7df8bc4 100644
> > --- a/src/mesa/drivers/dri/i965/intel_screen.c
> > +++ b/src/mesa/drivers/dri/i965/intel_screen.c
> > @@ -578,6 +578,7 @@ intel_create_image_from_texture(__DRIcontext
> > *context, int target,
> >intel_setup_image_from_mipmap_tree(brw, image, iobj->mt, level,
> > zoffset); image->dri_format =
> > driGLFormatToImageFormat(image->format); image->has_depthstencil =
> > iobj->mt->stencil_mt? true : false;
> > +   image->planar_format = iobj->planar_format;
> >if (image->dri_format == MESA_FORMAT_NONE) {
> >   *error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
> >   free(image);
> > --
> > 2.7.4
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev  
> 
> 
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965: return the fourcc saved in __DRIimage

2018-04-05 Thread James Xiong
On Thu, 5 Apr 2018 14:30:02 +0300
Tapani Pälli  wrote:

> On 04/05/2018 02:51 AM, James Xiong wrote:
> > From: "Xiong, James" 
> > 
> > The planar_format in __DRIimage contains the original fourcc
> > used to create the image, if it's set, return the saved fourcc
> > directly; Otherwise fall back to the old way.
> > 
> > Also we should validate the input parameter "value" first as it
> > might be NULL based on the SPEC.
> > 
> > v2: fall back to intel_lookup_fourcc() when planar_format is NULL
> >(by Dongwon & Matt Roper)
> > 
> > Signed-off-by: Xiong, James 
> > ---
> >   src/mesa/drivers/dri/i965/intel_screen.c | 15 ---
> >   1 file changed, 12 insertions(+), 3 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/intel_screen.c
> > b/src/mesa/drivers/dri/i965/intel_screen.c index 7df8bc4..aeecef3
> > 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c
> > +++ b/src/mesa/drivers/dri/i965/intel_screen.c
> > @@ -388,10 +388,16 @@ intel_image_format_lookup(int fourcc)
> >  return NULL;
> >   }
> >   
> > -static boolean intel_lookup_fourcc(int dri_format, int *fourcc)
> > +static boolean
> > +intel_image_get_fourcc(__DRIimage *image, int *fourcc)
> >   {
> > +   if (image->planar_format) {
> > +  *fourcc = image->planar_format->fourcc;
> > +  return true;
> > +   }
> > +
> >  for (unsigned i = 0; i < ARRAY_SIZE(intel_image_formats); i++)
> > {
> > -  if (intel_image_formats[i].planes[0].dri_format ==
> > dri_format) {
> > +  if (intel_image_formats[i].planes[0].dri_format ==
> > image->dri_format) { *fourcc = intel_image_formats[i].fourcc;
> >return true;
> > }
> > @@ -844,6 +850,9 @@ intel_create_image_with_modifiers(__DRIscreen
> > *dri_screen, static GLboolean
> >   intel_query_image(__DRIimage *image, int attrib, int *value)
> >   {
> > +   if (value == NULL)
> > +  return false;
> > +  
> 
> I would remove this check, we've been fine many years without it.
The function spec does say: ",  and  may
be NULL, in which case no value is retrieved."
it's better to stick to it and have an extra check than
segmentation fault, what do you say?
> 
> >  switch (attrib) {
> >  case __DRI_IMAGE_ATTRIB_STRIDE:
> > *value = image->pitch;
> > @@ -870,7 +879,7 @@ intel_query_image(__DRIimage *image, int
> > attrib, int *value) case __DRI_IMAGE_ATTRIB_FD:
> > return !brw_bo_gem_export_to_prime(image->bo, value);
> >  case __DRI_IMAGE_ATTRIB_FOURCC:
> > -  return intel_lookup_fourcc(image->dri_format, value);
> > +  return intel_image_get_fourcc(image, value);
> >  case __DRI_IMAGE_ATTRIB_NUM_PLANES:
> > if (isl_drm_modifier_has_aux(image->modifier)) {
> >assert(!image->planar_format ||
> > image->planar_format->nplanes == 1); 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/2] wrong fourcc was returned for imported images

2018-04-05 Thread James Xiong
On Thu, 5 Apr 2018 15:05:26 +0300
Tapani Pälli  wrote:

> Hi;
> 
> On 04/05/2018 02:51 AM, James Xiong wrote:
> > From: "Xiong, James" 
> > 
> > The importer creates an image out of the imported FOURCC_NV12
> > texture, the image's dri_format is set to R8(same as the first
> > plane's format), when it queries the image's fourcc, mesa goes
> > through intel_image_format table and returns FOURCC_R8.  
> 
> Could you explain the use case a bit, I'm not sure I understand how
> this happens. First someone imports dmabufs, creating EGLImage. Then
> a texture is associated with that (glEGLImageTargetTexture2DOES?), is
> it so that then we create yet another EGLImage from that texture or
> am I completely lost here? :)
> 
Sorry for the confusion. 

What actually happened was the exporter 1) calls eglCreateImageKHR()
to create an image from a NV12 texture, 2) calls
eglExportDMABUFImageQueryMESA() but the fourcc_r8 is returned instead
of fourcc_nv12.

Essentially, image->dri_format is the first plane's format, i.e.
__DRI_IMAGE_FORMAT_R8 in our case, when querying fourcc,
intel_lookup_fourcc() gets the following entry from the table
intel_image_formats:
   { __DRI_IMAGE_FOURCC_R8, __DRI_IMAGE_COMPONENTS_R, 1,
 { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, } },
instead of the corret one:
   { __DRI_IMAGE_FOURCC_NV12, __DRI_IMAGE_COMPONENTS_Y_UV, 2,
 { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
   { 1, 1, 1, __DRI_IMAGE_FORMAT_GR88, 2 } } },

To fix this, the image should inherit the texture's planar_format which
contains the original fourcc; When the user application queries the
fourcc, we can return the saved fourcc if planar_format is NULL;
otherwise fallback to the original way to look up the fourcc.

> > The solution is to 1) set the image's planar_format using the
> > texture's in function intel_create_image_from_texture(). 2) when
> > queried, return the saved fourcc@planar_format
> > 
> > Xiong, James (2):
> >i965: init image's planar_format in
> > intel_create_image_from_texture i965: return the fourcc saved in
> > __DRIimage
> > 
> >   src/mesa/drivers/dri/i965/intel_screen.c | 16 +---
> >   1 file changed, 13 insertions(+), 3 deletions(-)
> >   

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965: init image's planar_format in intel_create_image_from_texture

2018-04-04 Thread James Xiong
From: "Xiong, James" 

When creating a image from a texture, initialize the image's planar_format
with the texture's.

Signed-off-by: Xiong, James 
---
 src/mesa/drivers/dri/i965/intel_screen.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index dcb98da..7df8bc4 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -578,6 +578,7 @@ intel_create_image_from_texture(__DRIcontext *context, int 
target,
intel_setup_image_from_mipmap_tree(brw, image, iobj->mt, level, zoffset);
image->dri_format = driGLFormatToImageFormat(image->format);
image->has_depthstencil = iobj->mt->stencil_mt? true : false;
+   image->planar_format = iobj->planar_format;
if (image->dri_format == MESA_FORMAT_NONE) {
   *error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
   free(image);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/2] wrong fourcc was returned for imported images

2018-04-04 Thread James Xiong
From: "Xiong, James" 

The importer creates an image out of the imported FOURCC_NV12 texture,
the image's dri_format is set to R8(same as the first plane's format),
when it queries the image's fourcc, mesa goes through intel_image_format
table and returns FOURCC_R8.

The solution is to 1) set the image's planar_format using the texture's in
function intel_create_image_from_texture(). 2) when queried, return the
saved fourcc@planar_format

Xiong, James (2):
  i965: init image's planar_format in intel_create_image_from_texture
  i965: return the fourcc saved in __DRIimage

 src/mesa/drivers/dri/i965/intel_screen.c | 16 +---
 1 file changed, 13 insertions(+), 3 deletions(-)

-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] i965: return the fourcc saved in __DRIimage

2018-04-04 Thread James Xiong
From: "Xiong, James" 

The planar_format in __DRIimage contains the original fourcc
used to create the image, if it's set, return the saved fourcc
directly; Otherwise fall back to the old way.

Also we should validate the input parameter "value" first as it
might be NULL based on the SPEC.

v2: fall back to intel_lookup_fourcc() when planar_format is NULL
  (by Dongwon & Matt Roper)

Signed-off-by: Xiong, James 
---
 src/mesa/drivers/dri/i965/intel_screen.c | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index 7df8bc4..aeecef3 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -388,10 +388,16 @@ intel_image_format_lookup(int fourcc)
return NULL;
 }
 
-static boolean intel_lookup_fourcc(int dri_format, int *fourcc)
+static boolean
+intel_image_get_fourcc(__DRIimage *image, int *fourcc)
 {
+   if (image->planar_format) {
+  *fourcc = image->planar_format->fourcc;
+  return true;
+   }
+
for (unsigned i = 0; i < ARRAY_SIZE(intel_image_formats); i++) {
-  if (intel_image_formats[i].planes[0].dri_format == dri_format) {
+  if (intel_image_formats[i].planes[0].dri_format == image->dri_format) {
  *fourcc = intel_image_formats[i].fourcc;
  return true;
   }
@@ -844,6 +850,9 @@ intel_create_image_with_modifiers(__DRIscreen *dri_screen,
 static GLboolean
 intel_query_image(__DRIimage *image, int attrib, int *value)
 {
+   if (value == NULL)
+  return false;
+
switch (attrib) {
case __DRI_IMAGE_ATTRIB_STRIDE:
   *value = image->pitch;
@@ -870,7 +879,7 @@ intel_query_image(__DRIimage *image, int attrib, int *value)
case __DRI_IMAGE_ATTRIB_FD:
   return !brw_bo_gem_export_to_prime(image->bo, value);
case __DRI_IMAGE_ATTRIB_FOURCC:
-  return intel_lookup_fourcc(image->dri_format, value);
+  return intel_image_get_fourcc(image, value);
case __DRI_IMAGE_ATTRIB_NUM_PLANES:
   if (isl_drm_modifier_has_aux(image->modifier)) {
  assert(!image->planar_format || image->planar_format->nplanes == 1);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev