[Mesa-dev] [PATCH 1/1] anv/ehl: 36bits ppgtt support
From: "Xiong, James" The vma high heap's capacity and maximum address were pre-defined based on 48bits ppgtt support, and the buffers allocated from the vma high heap had invalid vma addresses to the ehl platform that only supports 36bits ppgtt. As a result, KMD rejected all batchbuffers submitted by vulkan. This patch: 1) initializes the vma high heap by retrieving the gtt capacity from KMD and calculating the size and max address on the fly. 2) enables softpin when full ppgtt is enabled V2: change commit messages and comments to refect the changes [Bob, Jason] remove define HIGH_HEAP_SIZE [Bob] make sure there's enough space to enable softspin [Jason] Signed-off-by: Xiong, James --- src/intel/vulkan/anv_device.c | 30 +++--- src/intel/vulkan/anv_private.h | 7 --- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 88b34c4..c3eff1c 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -434,7 +434,12 @@ anv_physical_device_init(struct anv_physical_device *device, anv_gem_supports_syncobj_wait(fd); device->has_context_priority = anv_gem_has_context_priority(fd); + /* +* make sure there are enough VA space(i.e. 32+bit support) and full ggtt +* is enabled. +*/ device->use_softpin = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN) + && (anv_gem_get_param(fd, I915_PARAM_HAS_ALIASING_PPGTT) > 1) && device->supports_48bit_addresses; device->has_context_isolation = @@ -1981,14 +1986,25 @@ VkResult anv_CreateDevice( device->vma_lo_available = physical_device->memory.heaps[physical_device->memory.heap_count - 1].size; - /* Leave the last 4GiB out of the high vma range, so that no state base - * address + size can overflow 48 bits. For more information see the - * comment about Wa32bitGeneralStateOffset in anv_allocator.c - */ - util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS, - HIGH_HEAP_SIZE); device->vma_hi_available = physical_device->memory.heap_count == 1 ? 0 : physical_device->memory.heaps[0].size; + + /* Retrieve the GTT's capacity and leave the last 4GiB out of the high vma + * range, so that no state base address + size can overflow the vma range. For + * more information see the comment about Wa32bitGeneralStateOffset in + * anv_allocator.c + */ + uint64_t size = 0; + anv_gem_get_context_param(device->fd, 0, I915_CONTEXT_PARAM_GTT_SIZE, +&size); + if(size > HIGH_HEAP_MIN_ADDRESS + (1ull<<32)) { + size -= HIGH_HEAP_MIN_ADDRESS + (1ull<<32); + device->vma_hi_max_addr = HIGH_HEAP_MIN_ADDRESS + size - 1; + } else { + size = device->vma_hi_max_addr = 0; + } + + util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS, size); } /* As per spec, the driver implementation may deny requests to acquire @@ -2456,7 +2472,7 @@ anv_vma_free(struct anv_device *device, struct anv_bo *bo) device->vma_lo_available += bo->size; } else { assert(addr_48b >= HIGH_HEAP_MIN_ADDRESS && - addr_48b <= HIGH_HEAP_MAX_ADDRESS); + addr_48b <= device->vma_hi_max_addr); util_vma_heap_free(&device->vma_hi, addr_48b, bo->size); device->vma_hi_available += bo->size; } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 1664918..ef9b012 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -109,6 +109,9 @@ struct gen_l3_config; * heap. Various hardware units will read past the end of an object for * various reasons. This healthy margin prevents reads from wrapping around * 48-bit addresses. + * + * (4) the high vma heap size and max address are calculated based on the + * gtt capacity retrieved from KMD. */ #define LOW_HEAP_MIN_ADDRESS 0x1000ULL /* 4 KiB */ #define LOW_HEAP_MAX_ADDRESS 0xbfffULL @@ -121,12 +124,9 @@ struct gen_l3_config; #define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x00018000ULL /* 6 GiB */ #define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x0001bfffULL #define HIGH_HEAP_MIN_ADDRESS 0x0001c000ULL /* 7 GiB */ -#define HIGH_HEAP_MAX_ADDRESS 0xfffeULL #define LOW_HEAP_SIZE \ (LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1) -#define HIGH_HEAP_SIZE \ - (HIGH_HEAP_MAX_ADDRESS - HIGH_HEAP_MIN_ADDRESS + 1) #define DYNAMIC_STATE_POOL_SIZE \ (DYNAMIC_STATE_POOL_MAX_ADDRESS - DYNAMIC_STATE_POOL_MIN_ADDRESS + 1) #define BINDING_TABLE_POOL_SIZE \ @@ -1093,6 +1093,7 @@ struct anv_device { struct util_vma_heapvma_hi; uint64_tvma_lo_available; uint64_t
[Mesa-dev] [PATCH 1/1] iris: set default tiling mode to X-tiled
From: "Xiong, James" UMDs such as modesetting can use legacy ADDFD which doesn't accept Y-tiled buffer, ths patch sets the default tiling mode to X-tiled for backward compatibility. Signed-off-by: Xiong, James --- src/gallium/drivers/iris/iris_resource.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/iris/iris_resource.c b/src/gallium/drivers/iris/iris_resource.c index d0a473f..56c9590 100644 --- a/src/gallium/drivers/iris/iris_resource.c +++ b/src/gallium/drivers/iris/iris_resource.c @@ -558,7 +558,8 @@ iris_resource_create_with_modifiers(struct pipe_screen *pscreen, uint64_t modifier = select_best_modifier(devinfo, modifiers, modifiers_count); - isl_tiling_flags_t tiling_flags = ISL_TILING_ANY_MASK; + /* Historically, X-tiled was the default */ + isl_tiling_flags_t tiling_flags = ISL_TILING_X_BIT; if (modifier != DRM_FORMAT_MOD_INVALID) { res->mod_info = isl_drm_modifier_get_info(modifier); -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/4] i965/drm: Searching for a cached buffer for reuse
On Fri, 11 May 2018 16:57:22 +0100 Chris Wilson wrote: > Quoting James Xiong (2018-05-11 16:35:04) > > On Thu, 10 May 2018 13:56:12 -0700 > > Kenneth Graunke wrote: > > > > > On Friday, May 4, 2018 5:56:04 PM PDT James Xiong wrote: > > > > From: "Xiong, James" > > > > > > > > Now that a bucket contains cached buffers with different sizes, > > > > go through its list and search for a cached buffer with enough > > > > size. > > > > > > > > Signed-off-by: Xiong, James <...> > > > > pos, member); \ > > > > > > Hi James, > > > > > > I don't particularly like the idea of introducing linear linked > > > list walks to find a buffer. It's a really nice property to have > > > every buffer in the bucket be suitable and be able to grab one > > > immediately. It sounds like purging things more often in patch 4 > > > reduces the cost of this new list walk, but...it'd be nice to not > > > add it in the first place. > > Yes, the purge, as well as the cleanup routine which kicks in from > > time > > It should not be time to time, it should be whenever the cache is > older than a 1s (use a timer thread if you are concerned) as it being > checked everytime something is put into the cache (and so before the > lists grow). That's exactly how it works, the cleanup gets called before a buffer is going to be put in the bucket, there is also a check if the an earlier cleaned up has been performed within 1s BTW. Sorry for the misleading. > If it is overallocating such that you are running out of > memory and not reusing buffers, the cache is barely functioning. Now > that's the information you want to present, where/when/why/how the > cache is not working. What is being put into the cache that is never > reused? -Chris I tried to cleanup cached buffers more aggressively, every single time when a buffer is unreferenced, and measure the memory usage, it didn't help much. I believe that cached-but-not-get-reused might contribute to the overhead too but mostly it is from the bucket size roundup. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/4] i965/drm: Searching for a cached buffer for reuse
On Thu, 10 May 2018 13:56:12 -0700 Kenneth Graunke wrote: > On Friday, May 4, 2018 5:56:04 PM PDT James Xiong wrote: > > From: "Xiong, James" > > > > Now that a bucket contains cached buffers with different sizes, go > > through its list and search for a cached buffer with enough size. > > > > Signed-off-by: Xiong, James > > --- > > src/mesa/drivers/dri/i965/brw_bufmgr.c | 21 +++-- > > src/util/list.h| 5 + > > 2 files changed, 20 insertions(+), 6 deletions(-) > > > > diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c > > b/src/mesa/drivers/dri/i965/brw_bufmgr.c index 6a9b005..5235aa6 > > 100644 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c > > +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c > > @@ -281,7 +281,7 @@ cached_bo_for_size(struct brw_bufmgr *bufmgr, > > assert(!(busy && zeroed)); > > > > if(bucket != NULL && !list_empty(&bucket->head)) { > > - struct brw_bo *bo; > > + struct brw_bo *bo, *temp_bo; > > retry: > >bo = NULL; > > > > @@ -292,8 +292,13 @@ retry: > >* asked us to zero the buffer, we don't want this > >* because we are going to mmap it. > >*/ > > - bo = LIST_ENTRY(struct brw_bo, bucket->head.prev, head); > > - list_del(&bo->head); > > + LIST_FOR_EACH_ENTRY_REV(temp_bo, &bucket->head, head) { > > +if (temp_bo->size >= size) { > > + bo = temp_bo; > > + list_del(&bo->head); > > + break; > > +} > > + } > >} else { > > /* For non-render-target BOs (where we're probably > >* going to map it first thing in order to fill it > > @@ -302,9 +307,13 @@ retry: > >* allocating a new buffer is probably faster than > >* waiting for the GPU to finish. > >*/ > > - bo = LIST_ENTRY(struct brw_bo, bucket->head.next, head); > > - if (!brw_bo_busy(bo)) { > > -list_del(&bo->head); > > + LIST_FOR_EACH_ENTRY(temp_bo, &bucket->head, head) { > > +if (temp_bo->size >= size && > > +!brw_bo_busy(temp_bo)) { > > + bo = temp_bo; > > + list_del(&bo->head); > > + break; > > +} > > } > >} > > > > diff --git a/src/util/list.h b/src/util/list.h > > index 6edb750..9362072 100644 > > --- a/src/util/list.h > > +++ b/src/util/list.h > > @@ -189,6 +189,11 @@ static inline void list_validate(struct > > list_head *list) &pos->member != > > (head); \ pos = > > container_of(pos->member.next, pos, member)) > > +#define LIST_FOR_EACH_ENTRY_REV(pos, head, > > member) \ > > + for (pos = NULL, pos = container_of((head)->prev, pos, > > member); \ > > +&pos->member != > > (head); \ > > +pos = container_of(pos->member.prev, pos, member)) > > + > > #define LIST_FOR_EACH_ENTRY_SAFE(pos, storage, head, > > member) \ for (pos = NULL, pos = container_of((head)->next, > > pos, member), \ storage = container_of(pos->member.next, > > pos, member); \ > > Hi James, > > I don't particularly like the idea of introducing linear linked list > walks to find a buffer. It's a really nice property to have every > buffer in the bucket be suitable and be able to grab one immediately. > It sounds like purging things more often in patch 4 reduces the cost > of this new list walk, but...it'd be nice to not add it in the first > place. Yes, the purge, as well as the cleanup routine which kicks in from time to time and removes any cached buffers freed 1+ second ago, keeps the list short. One thing I could think of is to have a fixed size cached buffer array so that we could use a binary search instead of walks of the double-linked list. > > This also conflicts with my new VMA allocator approach a lot, which > assumes that all buffers in a bucket are the same size... Yes. > > Still, you've shown some pretty nice memory savings. Scott and I were > wondering if it would be possible to achieve a similar effect by > tuning the bucket sizes - either adding more, or adjusting some. The > sizes we have are pretty arbitrary - we started with po
Re: [Mesa-dev] [PATCH 4/4] i965/drm: Purge the bucket when its cached buffer is evicted
On Sat, 5 May 2018 08:55:24 +0100 Chris Wilson wrote: > Quoting James Xiong (2018-05-05 01:56:05) > > From: "Xiong, James" > > > > When one of cached buffers is found to be evicted by kernel, > > most likely the buffers freed earlier than this buffer are > > gone too, go through the cached list in the bucket and purge. > > The order in this list bears little relevance to the MRU lists in the > kernel. (I wish it did.) So why? The list will be processed in due > course, so why pay the cost now? Even in the worst case, they will be > cleaned up as we are in the middle of processing the lists. > -Chris This is the existing logic, I think because the free time is the only hint indicates whether a buffer is evicted or not in the user land, maybe we could associate a fence with cached buffer in the future or something like that, but at the moment i don't know how to improve it, I just follow and make it work with the new implementation, the idea is still the same as before. The cleanup routine(cleanup_bo_cache?) is only triggered when a buffer's ref goes down to 0, my guess is that it's not frequent enough so that we need the purge to keep the cached list short maybe? Anyway, if we do need to improve this part, we could do it in another patch. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/4] improve buffer cache and reuse
On Sat, 5 May 2018 09:11:28 +0100 Chris Wilson wrote: > Quoting James Xiong (2018-05-05 01:56:01) > > This series align the buffer size up to page instead of a bucket > > size to improve memory allocation efficiency. > > It doesn't though. It still retrieves up to the bucket size, so with a > little cache poisoning (or a series of unfortunate events) it will be > no better than before. > > Perhaps open with the problem statement. What is it you are trying to > fix? Would adding metrics to the buffer cache be a good start to > demonstrating what needs improving? > -Chris In the worse case, it is the same as before; the best case however reduces the allocated size by about 25% of the requested size. in the real world, the best case was gl_5_off test where the patch saved 102749K(100+M) out of 1143593K, the worse case saved only 64K out of 12730K. The current implementation allocates 0% to 25% more memory than requested size with or without reuse enabled. I am trying to reduce the memory penalty. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/4] i965/drm: Purge the bucket when its cached buffer is evicted
From: "Xiong, James" When one of cached buffers is found to be evicted by kernel, most likely the buffers freed earlier than this buffer are gone too, go through the cached list in the bucket and purge. Signed-off-by: Xiong, James --- src/mesa/drivers/dri/i965/brw_bufmgr.c | 17 ++--- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c index 5235aa6..9f2e566 100644 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c @@ -248,17 +248,20 @@ brw_bo_madvise(struct brw_bo *bo, int state) return madv.retained; } -/* drop the oldest entries that have been purged by the kernel */ +/* drop the entries that are older than the given time */ static void brw_bo_cache_purge_bucket(struct brw_bufmgr *bufmgr, - struct bo_cache_bucket *bucket) + struct bo_cache_bucket *bucket, + time_t time) { list_for_each_entry_safe(struct brw_bo, bo, &bucket->head, head) { - if (brw_bo_madvise(bo, I915_MADV_DONTNEED)) + if (bo->free_time >= time) { + brw_bo_madvise(bo, I915_MADV_DONTNEED); + list_del(&bo->head); + bo_free(bo); + } else { break; - - list_del(&bo->head); - bo_free(bo); + } } } @@ -319,8 +322,8 @@ retry: if (bo) { if (!brw_bo_madvise(bo, I915_MADV_WILLNEED)) { +brw_bo_cache_purge_bucket(bufmgr, bucket, bo->free_time); bo_free(bo); -brw_bo_cache_purge_bucket(bufmgr, bucket); return NULL; } -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/4] improve buffer cache and reuse
From: "Xiong, James" With the current implementation, brw_bufmgr may round up a request size to the next bucket size, result in 25% more memory allocated in the worst senario. For example: Request sizeActual size 32KB+1Byte 40KB . 8MB+1Byte 10MB . 96MB+1Byte 112MB This series align the buffer size up to page instead of a bucket size to improve memory allocation efficiency. Performance and memory usage were measured on a gen9 platform using Basemark ES3, GfxBench 4 and 5, each test case ran 6 times. Basemark ES3 scorepeak memory size(KB) beforeafter diff before after diff max avg max avg maxavg 22 2123 21 2.83% 1.21% 409928 395573 -14355 20 2020 20 0.53% 0.41% GfxBench 4.0 scorepeak memory size(KB) score peak memory size(KB) before after diffbefore after diff max avg max avg max avg 584 577 586 583 0.45% 1.02% 566489 539699 -26791 728 727 727 726 -0.03% -0.16% 614730 586794 -27936 1604 1144 1650 1202 2.81% 4.86% 439220 411596 -27624 2711 2718 2152 0.25% -3.25% 126065 121398 -4667 1218 1213 1212 1154 -0.53% -5.10% 54153 53868 -285 106 104 106 103 0.85% -1.66% 12730 12666 -64 1732 1709 1740 1728 0.49% 1.11% 475716 447726 -27990 3051 2969 3066 3047 0.50% 2.55% 154169 148962 -5207 2626 2607 2626 2625 0.00% 0.70% 84119 83150 -969 211 208 208 205 -1.26% -1.21% 39924 39667 -257 GfxBench 5.0 score peak memory size(KB) beforeafter diffbefore afterdiff max avg max avg max avg 260 258 259 256 -0.39% -0.85% 037 1013520 -97517 298 295 298 297 0.00% 0.45% 1143593 1040844 -102749 Xiong, James (4): i965/drm: Reorganize code for the next patch i965/drm: Round down buffer size and calculate the bucket index i965/drm: Searching for a cached buffer for reuse i965/drm: Purge the bucket when its cached buffer is evicted src/mesa/drivers/dri/i965/brw_bufmgr.c | 139 ++--- src/util/list.h| 5 ++ 2 files changed, 79 insertions(+), 65 deletions(-) -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/4] i965/drm: Round down buffer size and calculate the bucket index
From: "Xiong, James" a buffer is now put in cached bucket #n when its size is between bucket[n].size and bucket[n+1].size - 1 Signed-off-by: Xiong, James --- src/mesa/drivers/dri/i965/brw_bufmgr.c | 11 +-- 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c index e68da26..6a9b005 100644 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c @@ -189,8 +189,8 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t pitch, uint32_t tiling) static struct bo_cache_bucket * bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size) { - /* Calculating the pages and rounding up to the page size. */ - const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + /* Calculating the pages and rounding down to the page size. */ + const unsigned pages = (size < PAGE_SIZE) ? 1 : size / PAGE_SIZE; /* Row Bucket sizesclz((x-1) | 3) RowColumn *in pages stride size @@ -211,8 +211,7 @@ bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size) int col_size_log2 = row - 1; col_size_log2 += (col_size_log2 < 0); - const unsigned col = (pages - prev_row_max_pages + -((1 << col_size_log2) - 1)) >> col_size_log2; + const unsigned col = (pages - prev_row_max_pages) >> col_size_log2; /* Calculating the index based on the row and column. */ const unsigned index = (row * 4) + (col - 1); @@ -1285,9 +1284,9 @@ add_bucket(struct brw_bufmgr *bufmgr, int size) bufmgr->cache_bucket[i].size = size; bufmgr->num_buckets++; + assert(bucket_for_size(bufmgr, size - 1) == &bufmgr->cache_bucket[i==0?0:i-1]); assert(bucket_for_size(bufmgr, size) == &bufmgr->cache_bucket[i]); - assert(bucket_for_size(bufmgr, size - 2048) == &bufmgr->cache_bucket[i]); - assert(bucket_for_size(bufmgr, size + 1) != &bufmgr->cache_bucket[i]); + assert(bucket_for_size(bufmgr, size + 1) == &bufmgr->cache_bucket[i]); } static void -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/4] i965/drm: Reorganize code for the next patch
From: "Xiong, James" split bo_alloc_internal, and add a new function cached_bo_for_size searches for a suitable cached buffer for a given size. Signed-off-by: Xiong, James --- src/mesa/drivers/dri/i965/brw_bufmgr.c | 92 +- 1 file changed, 45 insertions(+), 47 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c index 7cb1f03..e68da26 100644 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c @@ -263,53 +263,29 @@ brw_bo_cache_purge_bucket(struct brw_bufmgr *bufmgr, } } +/* search for a suitable cached bo for reuse */ static struct brw_bo * -bo_alloc_internal(struct brw_bufmgr *bufmgr, - const char *name, - uint64_t size, - unsigned flags, - uint32_t tiling_mode, - uint32_t stride) +cached_bo_for_size(struct brw_bufmgr *bufmgr, + uint64_t size, + uint32_t tiling_mode, + uint32_t stride, + unsigned flags) { - struct brw_bo *bo; - unsigned int page_size = getpagesize(); - int ret; - struct bo_cache_bucket *bucket; - bool alloc_from_cache; - uint64_t bo_size; - bool busy = false; - bool zeroed = false; - - if (flags & BO_ALLOC_BUSY) - busy = true; - - if (flags & BO_ALLOC_ZEROED) - zeroed = true; + bool busy = (flags & BO_ALLOC_BUSY) ? true : false; + bool zeroed = (flags & BO_ALLOC_ZEROED) ? true : false; + struct bo_cache_bucket *bucket = + (bufmgr->bo_reuse) ? bucket_for_size(bufmgr, size) : NULL; /* BUSY does doesn't really jive with ZEROED as we have to wait for it to * be idle before we can memset. Just disallow that combination. */ assert(!(busy && zeroed)); - /* Round the allocated size up to a power of two number of pages. */ - bucket = bucket_for_size(bufmgr, size); - - /* If we don't have caching at this size, don't actually round the -* allocation up. -*/ - if (bucket == NULL) { - bo_size = size; - if (bo_size < page_size) - bo_size = page_size; - } else { - bo_size = bucket->size; - } - - mtx_lock(&bufmgr->lock); - /* Get a buffer out of the cache if available */ + if(bucket != NULL && !list_empty(&bucket->head)) { + struct brw_bo *bo; retry: - alloc_from_cache = false; - if (bucket != NULL && !list_empty(&bucket->head)) { + bo = NULL; + if (busy && !zeroed) { /* Allocate new render-target BOs from the tail (MRU) * of the list, as it will likely be hot in the GPU @@ -319,7 +295,6 @@ retry: */ bo = LIST_ENTRY(struct brw_bo, bucket->head.prev, head); list_del(&bo->head); - alloc_from_cache = true; } else { /* For non-render-target BOs (where we're probably * going to map it first thing in order to fill it @@ -330,16 +305,15 @@ retry: */ bo = LIST_ENTRY(struct brw_bo, bucket->head.next, head); if (!brw_bo_busy(bo)) { -alloc_from_cache = true; list_del(&bo->head); } } - if (alloc_from_cache) { + if (bo) { if (!brw_bo_madvise(bo, I915_MADV_WILLNEED)) { bo_free(bo); brw_bo_cache_purge_bucket(bufmgr, bucket); -goto retry; +return NULL; } if (bo_set_tiling_internal(bo, tiling_mode, stride)) { @@ -353,20 +327,44 @@ retry: bo_free(bo); goto retry; } -memset(map, 0, bo_size); +memset(map, 0, bo->size); } } + + return bo; } - if (!alloc_from_cache) { + return NULL; +} + +static struct brw_bo * +bo_alloc_internal(struct brw_bufmgr *bufmgr, + const char *name, + uint64_t size, + unsigned flags, + uint32_t tiling_mode, + uint32_t stride) +{ + struct brw_bo *bo; + int ret; + + /* align the request size to page size */ + size = ALIGN(size, getpagesize()); + + mtx_lock(&bufmgr->lock); + + /* Get a buffer out of the cache if available */ + bo = cached_bo_for_size(bufmgr, size, tiling_mode, stride, flags); + + if (bo == NULL) { bo = calloc(1, sizeof(*bo)); if (!bo) goto err; - bo->size = bo_size; + bo->size = size; bo->idle = true; - struct drm_i915_gem_create create = { .size = bo_size }; + struct drm_i915_gem_create create = { .size = size }; /* All new BOs we get from the kernel are zeroed, so we don't need to * worry about that here. @@ -413,7 +411,7 @@ retry: mtx_unlock(&bufmgr->lock); DBG("bo_create: buf %d (%s) %llub\n", bo->gem_handle, bo->name, - (unsigned long long) size); + (unsigned long long) bo->size); return bo; -- 2.7.4 ___ mesa-dev mailing list m
[Mesa-dev] [PATCH 3/4] i965/drm: Searching for a cached buffer for reuse
From: "Xiong, James" Now that a bucket contains cached buffers with different sizes, go through its list and search for a cached buffer with enough size. Signed-off-by: Xiong, James --- src/mesa/drivers/dri/i965/brw_bufmgr.c | 21 +++-- src/util/list.h| 5 + 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c index 6a9b005..5235aa6 100644 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c @@ -281,7 +281,7 @@ cached_bo_for_size(struct brw_bufmgr *bufmgr, assert(!(busy && zeroed)); if(bucket != NULL && !list_empty(&bucket->head)) { - struct brw_bo *bo; + struct brw_bo *bo, *temp_bo; retry: bo = NULL; @@ -292,8 +292,13 @@ retry: * asked us to zero the buffer, we don't want this * because we are going to mmap it. */ - bo = LIST_ENTRY(struct brw_bo, bucket->head.prev, head); - list_del(&bo->head); + LIST_FOR_EACH_ENTRY_REV(temp_bo, &bucket->head, head) { +if (temp_bo->size >= size) { + bo = temp_bo; + list_del(&bo->head); + break; +} + } } else { /* For non-render-target BOs (where we're probably * going to map it first thing in order to fill it @@ -302,9 +307,13 @@ retry: * allocating a new buffer is probably faster than * waiting for the GPU to finish. */ - bo = LIST_ENTRY(struct brw_bo, bucket->head.next, head); - if (!brw_bo_busy(bo)) { -list_del(&bo->head); + LIST_FOR_EACH_ENTRY(temp_bo, &bucket->head, head) { +if (temp_bo->size >= size && +!brw_bo_busy(temp_bo)) { + bo = temp_bo; + list_del(&bo->head); + break; +} } } diff --git a/src/util/list.h b/src/util/list.h index 6edb750..9362072 100644 --- a/src/util/list.h +++ b/src/util/list.h @@ -189,6 +189,11 @@ static inline void list_validate(struct list_head *list) &pos->member != (head); \ pos = container_of(pos->member.next, pos, member)) +#define LIST_FOR_EACH_ENTRY_REV(pos, head, member) \ + for (pos = NULL, pos = container_of((head)->prev, pos, member); \ +&pos->member != (head); \ +pos = container_of(pos->member.prev, pos, member)) + #define LIST_FOR_EACH_ENTRY_SAFE(pos, storage, head, member) \ for (pos = NULL, pos = container_of((head)->next, pos, member), \ storage = container_of(pos->member.next, pos, member); \ -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/4] improve buffer cache and reuse
On Wed, 2 May 2018 14:18:21 +0300 Eero Tamminen wrote: > Hi, > > On 02.05.2018 02:25, James Xiong wrote: > > From: "Xiong, James" > > > > With the current implementation, brw_bufmgr may round up a request > > size to the next bucket size, result in 25% more memory allocated in > > the worst senario. For example: > > Request sizeActual size > > 32KB+1Byte 40KB > > . > > 8MB+1Byte 10MB > > . > > 96MB+1Byte 112MB > > This series align the buffer size up to page instead of a bucket > > size to improve memory allocation efficiency. Performances are > > almost the same with Basemark ES3, GfxBench4 and 5: > > > > Basemark ES3 > > scorepeak memory allocation > >before afterdiffbeforeafter diff > > 21.537462 21.888784 1.61%419766272 408809472 -10956800 > > 19.566198 19.763429 1.00% > > What memory you're measuring: > > * VmSize (not that relevant unless you're running out of address > space)? > > * PrivateDirty (listed in /proc/PID/smaps and e.g. by "smem" tool > [1])? > > * total of allocation sizes used by Mesa? > > Or something else? > > In general, unused memory isn't much of a problem, only dirty > (written) memory. Kernel maps all unused memory to a single zero > page, so unused memory takes only few bytes of RAM for the page table > entries (required for tracking the allocation pages). I did the measurements in brw_bufmgr from the user space, I kept tracks of the allocated size for each brw_bufmgr context, and printed out the peak allocated size when the test completed and context was destroyed. basically I increased/decreased the size when I915_GEM_CREATE or GEM_CLOSE were called, so the cached buffers, imported or user_ptr buffers were excluded. The brw_bufmgr context is created when the test starts and destroyed after it completes, the size is for the test case in bytes. This method can measure exact size allocated for a given test case and the result is precise too. > > > > GfxBench 4.0 > > score > > peak memory before after diff before > > after diff gl_4 564.6052246094 565.2348632813 > > 0.11% 578490368 550199296 -28291072 gl_4_off > > 727.0440063477 703.5833129883 -3.33% 629501952 > > 598216704 -31285248 gl_manhattan 1053.4223632813 > > 1057.3690185547 0.37% 449568768 421134336 -28434432 > > gl_trex 2708.0656738281 2699.2646484375 -0.33% > > 130076672 125042688 -5033984 gl_alu2 1207.1490478516 > > 1212.2220458984 0.42% 55496704 55029760 -466944 > > gl_driver2 103.0383071899 103.5478439331 0.49% > > 13107200 12980224 -126976 gl_manhattan_off 1703.4780273438 > > 1736.9074707031 1.92% 490016768 456548352 -33468416 > > gl_trex_off 2951.6809082031 3058.5422363281 3.49% > > 157511680 152260608 -5251072 gl_alu2_off 2604.0903320313 > > 2626.2524414063 0.84% 86130688 85483520 -647168 > > gl_driver2_off 204.0173187256 207.0510101318 1.47% > > 40869888 40615936 -253952 > > You're missing information on: > * On which plaform you did the testing (affects variance) > * how many test rounds you ran, and > * what is your variance I ran these tests on a gen9 platform/ubuntu 17.10 LTS. Most of the tests are consistent, especially the memory usage. The only exception is GfxBench 4.0 gl_manhattan, I had to ran it 3 times and pick the highest one. I will apply this method to all tests and re-send with updated results. > > -> I don't know whether your numbers are just random noise. > > > Memory is allocated in pages from kernel, so there's no point in > showing its usage as bytes. Please use KBs, that's more readable. > > (Because of randomness e.g. interactions with the windowing system, > there can be some variance also in process memory usage, which may > also be useful to report.) > > Because of variance, you don't need that decimals for the scores. > Removing the extra ones makes that data a bit more readable too. > > > - Eero > > [1] "smem" is python based tool available at least in Debian. > If you want something simpler, e.g. shell script working with > minimal shells like Busybox, you can use this: > https://github.com/maemo-tools-old/sp-memusage/blob/master/scripts/mem-smaps-private > > > > GfxBench 5.0 > > score peak memory > > beforeafter b
[Mesa-dev] [PATCH 1/4] i965/drm: Reorganize code for the next patch
From: "Xiong, James" split bo_alloc_internal, and add a new function cached_bo_for_size searches for a suitable cached buffer for a given size. Signed-off-by: Xiong, James --- src/mesa/drivers/dri/i965/brw_bufmgr.c | 92 +- 1 file changed, 45 insertions(+), 47 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c index 7cb1f03..e68da26 100644 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c @@ -263,53 +263,29 @@ brw_bo_cache_purge_bucket(struct brw_bufmgr *bufmgr, } } +/* search for a suitable cached bo for reuse */ static struct brw_bo * -bo_alloc_internal(struct brw_bufmgr *bufmgr, - const char *name, - uint64_t size, - unsigned flags, - uint32_t tiling_mode, - uint32_t stride) +cached_bo_for_size(struct brw_bufmgr *bufmgr, + uint64_t size, + uint32_t tiling_mode, + uint32_t stride, + unsigned flags) { - struct brw_bo *bo; - unsigned int page_size = getpagesize(); - int ret; - struct bo_cache_bucket *bucket; - bool alloc_from_cache; - uint64_t bo_size; - bool busy = false; - bool zeroed = false; - - if (flags & BO_ALLOC_BUSY) - busy = true; - - if (flags & BO_ALLOC_ZEROED) - zeroed = true; + bool busy = (flags & BO_ALLOC_BUSY) ? true : false; + bool zeroed = (flags & BO_ALLOC_ZEROED) ? true : false; + struct bo_cache_bucket *bucket = + (bufmgr->bo_reuse) ? bucket_for_size(bufmgr, size) : NULL; /* BUSY does doesn't really jive with ZEROED as we have to wait for it to * be idle before we can memset. Just disallow that combination. */ assert(!(busy && zeroed)); - /* Round the allocated size up to a power of two number of pages. */ - bucket = bucket_for_size(bufmgr, size); - - /* If we don't have caching at this size, don't actually round the -* allocation up. -*/ - if (bucket == NULL) { - bo_size = size; - if (bo_size < page_size) - bo_size = page_size; - } else { - bo_size = bucket->size; - } - - mtx_lock(&bufmgr->lock); - /* Get a buffer out of the cache if available */ + if(bucket != NULL && !list_empty(&bucket->head)) { + struct brw_bo *bo; retry: - alloc_from_cache = false; - if (bucket != NULL && !list_empty(&bucket->head)) { + bo = NULL; + if (busy && !zeroed) { /* Allocate new render-target BOs from the tail (MRU) * of the list, as it will likely be hot in the GPU @@ -319,7 +295,6 @@ retry: */ bo = LIST_ENTRY(struct brw_bo, bucket->head.prev, head); list_del(&bo->head); - alloc_from_cache = true; } else { /* For non-render-target BOs (where we're probably * going to map it first thing in order to fill it @@ -330,16 +305,15 @@ retry: */ bo = LIST_ENTRY(struct brw_bo, bucket->head.next, head); if (!brw_bo_busy(bo)) { -alloc_from_cache = true; list_del(&bo->head); } } - if (alloc_from_cache) { + if (bo) { if (!brw_bo_madvise(bo, I915_MADV_WILLNEED)) { bo_free(bo); brw_bo_cache_purge_bucket(bufmgr, bucket); -goto retry; +return NULL; } if (bo_set_tiling_internal(bo, tiling_mode, stride)) { @@ -353,20 +327,44 @@ retry: bo_free(bo); goto retry; } -memset(map, 0, bo_size); +memset(map, 0, bo->size); } } + + return bo; } - if (!alloc_from_cache) { + return NULL; +} + +static struct brw_bo * +bo_alloc_internal(struct brw_bufmgr *bufmgr, + const char *name, + uint64_t size, + unsigned flags, + uint32_t tiling_mode, + uint32_t stride) +{ + struct brw_bo *bo; + int ret; + + /* align the request size to page size */ + size = ALIGN(size, getpagesize()); + + mtx_lock(&bufmgr->lock); + + /* Get a buffer out of the cache if available */ + bo = cached_bo_for_size(bufmgr, size, tiling_mode, stride, flags); + + if (bo == NULL) { bo = calloc(1, sizeof(*bo)); if (!bo) goto err; - bo->size = bo_size; + bo->size = size; bo->idle = true; - struct drm_i915_gem_create create = { .size = bo_size }; + struct drm_i915_gem_create create = { .size = size }; /* All new BOs we get from the kernel are zeroed, so we don't need to * worry about that here. @@ -413,7 +411,7 @@ retry: mtx_unlock(&bufmgr->lock); DBG("bo_create: buf %d (%s) %llub\n", bo->gem_handle, bo->name, - (unsigned long long) size); + (unsigned long long) bo->size); return bo; -- 2.7.4 ___ mesa-dev mailing list m
[Mesa-dev] [PATCH 3/4] i965/drm: Searching for a cached buffer for reuse
From: "Xiong, James" Now that a bucket contains cached buffers with different sizes, go through its list and search for a cached buffer with enough size. Signed-off-by: Xiong, James --- src/mesa/drivers/dri/i965/brw_bufmgr.c | 21 +++-- src/util/list.h| 5 + 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c index 6a9b005..5235aa6 100644 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c @@ -281,7 +281,7 @@ cached_bo_for_size(struct brw_bufmgr *bufmgr, assert(!(busy && zeroed)); if(bucket != NULL && !list_empty(&bucket->head)) { - struct brw_bo *bo; + struct brw_bo *bo, *temp_bo; retry: bo = NULL; @@ -292,8 +292,13 @@ retry: * asked us to zero the buffer, we don't want this * because we are going to mmap it. */ - bo = LIST_ENTRY(struct brw_bo, bucket->head.prev, head); - list_del(&bo->head); + LIST_FOR_EACH_ENTRY_REV(temp_bo, &bucket->head, head) { +if (temp_bo->size >= size) { + bo = temp_bo; + list_del(&bo->head); + break; +} + } } else { /* For non-render-target BOs (where we're probably * going to map it first thing in order to fill it @@ -302,9 +307,13 @@ retry: * allocating a new buffer is probably faster than * waiting for the GPU to finish. */ - bo = LIST_ENTRY(struct brw_bo, bucket->head.next, head); - if (!brw_bo_busy(bo)) { -list_del(&bo->head); + LIST_FOR_EACH_ENTRY(temp_bo, &bucket->head, head) { +if (temp_bo->size >= size && +!brw_bo_busy(temp_bo)) { + bo = temp_bo; + list_del(&bo->head); + break; +} } } diff --git a/src/util/list.h b/src/util/list.h index 6edb750..9362072 100644 --- a/src/util/list.h +++ b/src/util/list.h @@ -189,6 +189,11 @@ static inline void list_validate(struct list_head *list) &pos->member != (head); \ pos = container_of(pos->member.next, pos, member)) +#define LIST_FOR_EACH_ENTRY_REV(pos, head, member) \ + for (pos = NULL, pos = container_of((head)->prev, pos, member); \ +&pos->member != (head); \ +pos = container_of(pos->member.prev, pos, member)) + #define LIST_FOR_EACH_ENTRY_SAFE(pos, storage, head, member) \ for (pos = NULL, pos = container_of((head)->next, pos, member), \ storage = container_of(pos->member.next, pos, member); \ -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/4] improve buffer cache and reuse
From: "Xiong, James" With the current implementation, brw_bufmgr may round up a request size to the next bucket size, result in 25% more memory allocated in the worst senario. For example: Request sizeActual size 32KB+1Byte 40KB . 8MB+1Byte 10MB . 96MB+1Byte 112MB This series align the buffer size up to page instead of a bucket size to improve memory allocation efficiency. Performances are almost the same with Basemark ES3, GfxBench4 and 5: Basemark ES3 scorepeak memory allocation before afterdiffbeforeafter diff 21.537462 21.888784 1.61%419766272 408809472 -10956800 19.566198 19.763429 1.00% GfxBench 4.0 scorepeak memory before after diff before after diff gl_4 564.6052246094 565.2348632813 0.11% 578490368 550199296 -28291072 gl_4_off 727.0440063477 703.5833129883 -3.33% 629501952 598216704 -31285248 gl_manhattan 1053.4223632813 1057.3690185547 0.37% 449568768 421134336 -28434432 gl_trex 2708.0656738281 2699.2646484375 -0.33% 130076672 125042688 -5033984 gl_alu2 1207.1490478516 1212.2220458984 0.42% 55496704 55029760 -466944 gl_driver2 103.0383071899 103.5478439331 0.49% 13107200 12980224 -126976 gl_manhattan_off 1703.4780273438 1736.9074707031 1.92% 490016768 456548352 -33468416 gl_trex_off 2951.6809082031 3058.5422363281 3.49% 157511680 152260608 -5251072 gl_alu2_off 2604.0903320313 2626.2524414063 0.84% 86130688 85483520 -647168 gl_driver2_off 204.0173187256 207.0510101318 1.47% 40869888 40615936 -253952 GfxBench 5.0 score peak memory before after before after diff gl_5 259 259 1137549312 1038286848 -99262464 gl_5_off 297 297 1170853888 1071357952 -99495936 Xiong, James (4): i965/drm: Reorganize code for the next patch i965/drm: Round down buffer size and calculate the bucket index i965/drm: Searching for a cached buffer for reuse i965/drm: Purge the bucket when its cached buffer is evicted src/mesa/drivers/dri/i965/brw_bufmgr.c | 139 ++--- src/util/list.h| 5 ++ 2 files changed, 79 insertions(+), 65 deletions(-) -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/4] i965/drm: Round down buffer size and calculate the bucket index
From: "Xiong, James" a buffer is now put in cached bucket #n when its size is between bucket[n].size and bucket[n+1].size - 1 Signed-off-by: Xiong, James --- src/mesa/drivers/dri/i965/brw_bufmgr.c | 11 +-- 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c index e68da26..6a9b005 100644 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c @@ -189,8 +189,8 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t pitch, uint32_t tiling) static struct bo_cache_bucket * bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size) { - /* Calculating the pages and rounding up to the page size. */ - const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + /* Calculating the pages and rounding down to the page size. */ + const unsigned pages = (size < PAGE_SIZE) ? 1 : size / PAGE_SIZE; /* Row Bucket sizesclz((x-1) | 3) RowColumn *in pages stride size @@ -211,8 +211,7 @@ bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size) int col_size_log2 = row - 1; col_size_log2 += (col_size_log2 < 0); - const unsigned col = (pages - prev_row_max_pages + -((1 << col_size_log2) - 1)) >> col_size_log2; + const unsigned col = (pages - prev_row_max_pages) >> col_size_log2; /* Calculating the index based on the row and column. */ const unsigned index = (row * 4) + (col - 1); @@ -1285,9 +1284,9 @@ add_bucket(struct brw_bufmgr *bufmgr, int size) bufmgr->cache_bucket[i].size = size; bufmgr->num_buckets++; + assert(bucket_for_size(bufmgr, size - 1) == &bufmgr->cache_bucket[i==0?0:i-1]); assert(bucket_for_size(bufmgr, size) == &bufmgr->cache_bucket[i]); - assert(bucket_for_size(bufmgr, size - 2048) == &bufmgr->cache_bucket[i]); - assert(bucket_for_size(bufmgr, size + 1) != &bufmgr->cache_bucket[i]); + assert(bucket_for_size(bufmgr, size + 1) == &bufmgr->cache_bucket[i]); } static void -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/4] i965/drm: Purge the bucket when its cached buffer is evicted
From: "Xiong, James" When one of cached buffers is found to be evicted by kernel, most likely the buffers freed earlier than this buffer are gone too, go through the cached list in the bucket and purge. Signed-off-by: Xiong, James --- src/mesa/drivers/dri/i965/brw_bufmgr.c | 17 ++--- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c index 5235aa6..9f2e566 100644 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c @@ -248,17 +248,20 @@ brw_bo_madvise(struct brw_bo *bo, int state) return madv.retained; } -/* drop the oldest entries that have been purged by the kernel */ +/* drop the entries that are older than the given time */ static void brw_bo_cache_purge_bucket(struct brw_bufmgr *bufmgr, - struct bo_cache_bucket *bucket) + struct bo_cache_bucket *bucket, + time_t time) { list_for_each_entry_safe(struct brw_bo, bo, &bucket->head, head) { - if (brw_bo_madvise(bo, I915_MADV_DONTNEED)) + if (bo->free_time >= time) { + brw_bo_madvise(bo, I915_MADV_DONTNEED); + list_del(&bo->head); + bo_free(bo); + } else { break; - - list_del(&bo->head); - bo_free(bo); + } } } @@ -319,8 +322,8 @@ retry: if (bo) { if (!brw_bo_madvise(bo, I915_MADV_WILLNEED)) { +brw_bo_cache_purge_bucket(bufmgr, bucket, bo->free_time); bo_free(bo); -brw_bo_cache_purge_bucket(bufmgr, bucket); return NULL; } -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/1] main: fail texture_storage() call if the size is not okay
From: "Xiong, James" Signed-off-by: Xiong, James --- src/mesa/main/texstorage.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/main/texstorage.c b/src/mesa/main/texstorage.c index 44edba3..9cb8b90 100644 --- a/src/mesa/main/texstorage.c +++ b/src/mesa/main/texstorage.c @@ -445,6 +445,7 @@ texture_storage(struct gl_context *ctx, GLuint dims, _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTex%sStorage%uD(texture too large)", suffix, dims); +return; } } -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/1] main: return 0 length when the queried program object's not linked
On Fri, 27 Apr 2018 12:32:09 +1000 Timothy Arceri wrote: > Reviewed-by: Timothy Arceri > > It would also be nice to have a piglit test for this case. I assume > mesa probably crashes without this patch? Thanks for the review. Yes there was a segmentation fault. > > On 27/04/18 11:39, James Xiong wrote: > > From: "Xiong, James" > > > > Signed-off-by: Xiong, James > > --- > > src/mesa/main/shaderapi.c | 2 +- > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c > > index 44b18af..caa4254 100644 > > --- a/src/mesa/main/shaderapi.c > > +++ b/src/mesa/main/shaderapi.c > > @@ -837,7 +837,7 @@ get_programiv(struct gl_context *ctx, GLuint > > program, GLenum pname, *params = shProg->BinaryRetreivableHint; > > return; > > case GL_PROGRAM_BINARY_LENGTH: > > - if (ctx->Const.NumProgramBinaryFormats == 0) { > > + if (ctx->Const.NumProgramBinaryFormats == 0 > > || !shProg->data->LinkStatus) { *params = 0; > > } else { > >_mesa_get_program_binary_length(ctx, shProg, params); > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/1] main: return 0 length when the queried program object's not linked
From: "Xiong, James" Signed-off-by: Xiong, James --- src/mesa/main/shaderapi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index 44b18af..caa4254 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -837,7 +837,7 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname, *params = shProg->BinaryRetreivableHint; return; case GL_PROGRAM_BINARY_LENGTH: - if (ctx->Const.NumProgramBinaryFormats == 0) { + if (ctx->Const.NumProgramBinaryFormats == 0 || !shProg->data->LinkStatus) { *params = 0; } else { _mesa_get_program_binary_length(ctx, shProg, params); -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/1] i965: Make sure the shadow buffers have enough space
On Fri, 13 Apr 2018 14:33:09 -0700 Kenneth Graunke wrote: > On Friday, April 13, 2018 2:08:40 PM PDT James Xiong wrote: > > On Fri, 13 Apr 2018 13:51:02 -0700 > > Kenneth Graunke wrote: > > > > > On Friday, April 13, 2018 1:35:45 PM PDT Kenneth Graunke wrote: > > > > On Monday, April 9, 2018 4:06:16 PM PDT James Xiong wrote: > > > > > From: "Xiong, James" > > > > > > > > > > On non-LLC platforms, we malloc shadow batch/state buffers > > > > > of the same sizes as our batch/state buffers' GEM allocations. > > > > > However the buffer allocator reuses similar-sized gem objects, > > > > > it returns a buffer larger than we asked for in some cases > > > > > and we end up with smaller shadow buffers. If we utilize the > > > > > full-size of the over-allocated batch/state buffers, we may > > > > > wind up accessing beyond the bounds of the shadow buffers and > > > > > cause segmentation fault and/or memory corruption. > > > > > > > > Oh, good catch! We do indeed malloc too little if the bufmgr > > Thank you for taking time to review, Kenneth. > > > > > > > > > A few examples: > > > > > casebatch state > > > > > request bo shadow request bo shadow > > > > > init020K 20K 20K16K 16K 16K > > > > > grow_buffer 130K 32K 30K24K 24K 24K > > > > > grow_buffer 248K 48K 48K36K 40K 36K > > > > > grow_buffer 372K 80K 72K60K 64K 60K > > > > > grow_buffer 4120K128K 120K - - - > > > > > > > > > > batch #1, #3, #4; state #2 and #3 are problematic. We can > > > > > change the order to allocate the bo first, then allocate the > > > > > shadow buffer using the bo's size so that the shadow buffer > > > > > have at least an equivalent size of the gem allocation. > > > > > > > > > > Another problem: even though the state/batch buffer could > > > > > grow, when checking if it runs out space, we always compare > > > > > with the initial batch/state sizes. To utilize the entire > > > > > buffers, change to compare with the actual sizes. > > > > > > > > This is actually intentional. Our goal is to flush batches > > > > when the amount of commands or state reaches those thresholds. > > > > Occasionally, we'll be in the middle of emitting a draw, and > > > > unable to stop. In that case, we grow the batch and keep > > > > going. But after that, we're beyond our original target, so we > > > > flush next time. We don't want to grow without bounds...it's > > > > meant more for emergencies, or if we've badly estimated the > > > > size of the draw call. > > I am not sure I get it. Let me give an example: the state buffer > > gets grown once from 16K to 24K in brw_state_batch(), the used_size > > becomes 20K, then brw_require_statebuffer_space(1024) gets called to > > ask for 1K space, with the original logical, it compares the used > > size with 16K and flush the batch even though the state buffer > > still has 4K space available? > > Yes, the idea is to flush at around 16K of state. If we happen to be > in the middle of a draw and run out of space, we'll grow to 24K. > Once it's over 16K, we flush as soon as we can. > > We'd like to be fairly consistent on our batch size. Running larger > batches can lead to differences in performance, and large batches can > lead to degradation in the interactivity of the system (especially on > GPUs without preemption). > > The hope is to grow once, at most. If we check against the BO size, > we might grow repeatedly, which would lead to really large batches and > things would get out of hand. I see, thanks for the explanation, Kenneth. > > > > > > > > > I've sent a simpler patch which I think should hopefully fix > > > > your bug: https://patchwork.freedesktop.org/patch/217107/ > > > > > > Lionel noticed that I botched that patch. Here's an actual one: > > > > > > https://patchwork.freedesktop.org/patch/217108/ > > Yes it will fix the existing bug. However the assumption here is > > that the init allocation size will NOT be rounded up as it happens > > to be the bucket size. > > I am working on an optimization to improve memory usage(that's how > > I find out this bug), this assumption is no longer true. > > Essentially the bufmgr could return a buffer with the same or > > larger size whether it is same as the bucket's or not. Anyway I > > guess I can send the fix later along with the optimization > > patches. > > Ah, that's a good point. Your patch also tries to use the BO size > for the initial malloc as well, which is a good idea... So what do you want? you want me to change this patch and sent for review or take yours for now. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/1] i965: Make sure the shadow buffers have enough space
On Fri, 13 Apr 2018 13:51:02 -0700 Kenneth Graunke wrote: > On Friday, April 13, 2018 1:35:45 PM PDT Kenneth Graunke wrote: > > On Monday, April 9, 2018 4:06:16 PM PDT James Xiong wrote: > > > From: "Xiong, James" > > > > > > On non-LLC platforms, we malloc shadow batch/state buffers > > > of the same sizes as our batch/state buffers' GEM allocations. > > > However the buffer allocator reuses similar-sized gem objects, > > > it returns a buffer larger than we asked for in some cases > > > and we end up with smaller shadow buffers. If we utilize the > > > full-size of the over-allocated batch/state buffers, we may wind > > > up accessing beyond the bounds of the shadow buffers and cause > > > segmentation fault and/or memory corruption. > > > > Oh, good catch! We do indeed malloc too little if the bufmgr Thank you for taking time to review, Kenneth. > > > > > A few examples: > > > casebatch state > > > request bo shadow request bo shadow > > > init020K 20K 20K16K 16K 16K > > > grow_buffer 130K 32K 30K24K 24K 24K > > > grow_buffer 248K 48K 48K36K 40K 36K > > > grow_buffer 372K 80K 72K60K 64K 60K > > > grow_buffer 4120K128K 120K - - - > > > > > > batch #1, #3, #4; state #2 and #3 are problematic. We can change > > > the order to allocate the bo first, then allocate the shadow > > > buffer using the bo's size so that the shadow buffer have at > > > least an equivalent size of the gem allocation. > > > > > > Another problem: even though the state/batch buffer could grow, > > > when checking if it runs out space, we always compare with the > > > initial batch/state sizes. To utilize the entire buffers, change > > > to compare with the actual sizes. > > > > This is actually intentional. Our goal is to flush batches when the > > amount of commands or state reaches those thresholds. Occasionally, > > we'll be in the middle of emitting a draw, and unable to stop. In > > that case, we grow the batch and keep going. But after that, we're > > beyond our original target, so we flush next time. We don't want > > to grow without bounds...it's meant more for emergencies, or if > > we've badly estimated the size of the draw call. I am not sure I get it. Let me give an example: the state buffer gets grown once from 16K to 24K in brw_state_batch(), the used_size becomes 20K, then brw_require_statebuffer_space(1024) gets called to ask for 1K space, with the original logical, it compares the used size with 16K and flush the batch even though the state buffer still has 4K space available? > > > > I've sent a simpler patch which I think should hopefully fix your > > bug: https://patchwork.freedesktop.org/patch/217107/ > > Lionel noticed that I botched that patch. Here's an actual one: > > https://patchwork.freedesktop.org/patch/217108/ Yes it will fix the existing bug. However the assumption here is that the init allocation size will NOT be rounded up as it happens to be the bucket size. I am working on an optimization to improve memory usage(that's how I find out this bug), this assumption is no longer true. Essentially the bufmgr could return a buffer with the same or larger size whether it is same as the bucket's or not. Anyway I guess I can send the fix later along with the optimization patches. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/1] i965: Make sure the shadow buffers have enough space
From: "Xiong, James" On non-LLC platforms, we malloc shadow batch/state buffers of the same sizes as our batch/state buffers' GEM allocations. However the buffer allocator reuses similar-sized gem objects, it returns a buffer larger than we asked for in some cases and we end up with smaller shadow buffers. If we utilize the full-size of the over-allocated batch/state buffers, we may wind up accessing beyond the bounds of the shadow buffers and cause segmentation fault and/or memory corruption. A few examples: casebatch state request bo shadow request bo shadow init020K 20K 20K16K 16K 16K grow_buffer 130K 32K 30K24K 24K 24K grow_buffer 248K 48K 48K36K 40K 36K grow_buffer 372K 80K 72K60K 64K 60K grow_buffer 4120K128K 120K - - - batch #1, #3, #4; state #2 and #3 are problematic. We can change the order to allocate the bo first, then allocate the shadow buffer using the bo's size so that the shadow buffer have at least an equivalent size of the gem allocation. Another problem: even though the state/batch buffer could grow, when checking if it runs out space, we always compare with the initial batch/state sizes. To utilize the entire buffers, change to compare with the actual sizes. Cc: mesa-sta...@lists.freedesktop.org Signed-off-by: Xiong, James --- src/mesa/drivers/dri/i965/brw_context.h | 1 + src/mesa/drivers/dri/i965/intel_batchbuffer.c | 49 +-- 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index f049d08..39aae08 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -477,6 +477,7 @@ struct brw_growing_bo { struct brw_bo *partial_bo; uint32_t *partial_bo_map; unsigned partial_bytes; + unsigned shadow_size; }; struct intel_batchbuffer { diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index 7286140..facbbf8 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -107,12 +107,6 @@ intel_batchbuffer_init(struct brw_context *brw) batch->use_shadow_copy = !devinfo->has_llc; - if (batch->use_shadow_copy) { - batch->batch.map = malloc(BATCH_SZ); - batch->map_next = batch->batch.map; - batch->state.map = malloc(STATE_SZ); - } - init_reloc_list(&batch->batch_relocs, 250); init_reloc_list(&batch->state_relocs, 250); @@ -212,10 +206,25 @@ intel_batchbuffer_reset(struct brw_context *brw) batch->last_bo = batch->batch.bo; recreate_growing_buffer(brw, &batch->batch, "batchbuffer", BATCH_SZ); - batch->map_next = batch->batch.map; recreate_growing_buffer(brw, &batch->state, "statebuffer", STATE_SZ); + if (batch->use_shadow_copy) { + if (batch->batch.shadow_size < batch->batch.bo->size) { + free(batch->batch.map); + batch->batch.map = malloc(batch->batch.bo->size); + batch->batch.shadow_size = batch->batch.bo->size; + } + + if (batch->state.shadow_size < batch->state.bo->size) { + free(batch->state.map); + batch->state.map = malloc(batch->state.bo->size); + batch->state.shadow_size = batch->state.bo->size; + } + } + + batch->map_next = batch->batch.map; + /* Avoid making 0 a valid state offset - otherwise the decoder will try * and decode data when we use offset 0 as a null pointer. */ @@ -361,7 +370,8 @@ grow_buffer(struct brw_context *brw, * breaking existing pointers the caller may still be using. Just * malloc a new copy and memcpy it like the normal BO path. */ - grow->map = malloc(new_size); + grow->map = malloc(new_bo->size); + grow->shadow_size = new_bo->size; } else { grow->map = brw_bo_map(brw, new_bo, MAP_READ | MAP_WRITE); } @@ -467,15 +477,17 @@ intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz, } const unsigned batch_used = USED_BATCH(*batch) * 4; - if (batch_used + sz >= BATCH_SZ && !batch->no_wrap) { - intel_batchbuffer_flush(brw); - } else if (batch_used + sz >= batch->batch.bo->size) { - const unsigned new_size = - MIN2(batch->batch.bo->size + batch->batch.bo->size / 2, - MAX_BATCH_SIZE); - grow_buffer(brw, &batch->batch, batch_used, new_size); - batch->map_next = (void *) batch->batch.map + batch_used; - assert(batch_used + sz < batch->batch.bo->size); + if (batch_used + sz >= batch->batch.bo->size) { + if (!batch->no_wrap) { + intel_batchbuffer_flush(brw); + } else { + const unsigned new_size = +MIN2(batch->batch.bo->size + batch->batch.bo->size / 2, + MAX_BATCH_SIZE); + grow_buff
[Mesa-dev] [PATCH 1/1] i965: return the fourcc saved in __DRIimage when possible
From: "Xiong, James" When creating a image from a texture, the image's dri_format is set to the first plane's format, and used to look up for the fourcc. e.g. for FOURCC_NV12 texture, the dri_format is set to __DRI_IMAGE_FORMAT_R8, we end up with a wrong entry in function intel_lookup_fourcc(): { __DRI_IMAGE_FOURCC_R8, __DRI_IMAGE_COMPONENTS_R, 1, { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, } }, instead of the corret one: { __DRI_IMAGE_FOURCC_NV12, __DRI_IMAGE_COMPONENTS_Y_UV, 2, { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, { 1, 1, 1, __DRI_IMAGE_FORMAT_GR88, 2 } } }, as a result, a wrong fourcc __DRI_IMAGE_FOURCC_R8 was returned. To fix this bug, the image inherits the texture's planar_format that has the original fourcc; Upon querying, if planar_format is set, return the saved fourcc; Otherwise fall back to the old way. v3: add a bug description and "cc mesa-stable" tag (Jason) remove abandunt null pointer check (Tapani) squash 2 patches into one (James) v2: fall back to intel_lookup_fourcc() when planar_format is NULL (Dongwon & Matt Roper) Cc: mesa-sta...@lists.freedesktop.org Signed-off-by: Xiong, James --- src/mesa/drivers/dri/i965/intel_screen.c | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index dcb98da..29cb7ad 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -388,10 +388,16 @@ intel_image_format_lookup(int fourcc) return NULL; } -static boolean intel_lookup_fourcc(int dri_format, int *fourcc) +static boolean +intel_image_get_fourcc(__DRIimage *image, int *fourcc) { + if (image->planar_format) { + *fourcc = image->planar_format->fourcc; + return true; + } + for (unsigned i = 0; i < ARRAY_SIZE(intel_image_formats); i++) { - if (intel_image_formats[i].planes[0].dri_format == dri_format) { + if (intel_image_formats[i].planes[0].dri_format == image->dri_format) { *fourcc = intel_image_formats[i].fourcc; return true; } @@ -578,6 +584,7 @@ intel_create_image_from_texture(__DRIcontext *context, int target, intel_setup_image_from_mipmap_tree(brw, image, iobj->mt, level, zoffset); image->dri_format = driGLFormatToImageFormat(image->format); image->has_depthstencil = iobj->mt->stencil_mt? true : false; + image->planar_format = iobj->planar_format; if (image->dri_format == MESA_FORMAT_NONE) { *error = __DRI_IMAGE_ERROR_BAD_PARAMETER; free(image); @@ -869,7 +876,7 @@ intel_query_image(__DRIimage *image, int attrib, int *value) case __DRI_IMAGE_ATTRIB_FD: return !brw_bo_gem_export_to_prime(image->bo, value); case __DRI_IMAGE_ATTRIB_FOURCC: - return intel_lookup_fourcc(image->dri_format, value); + return intel_image_get_fourcc(image, value); case __DRI_IMAGE_ATTRIB_NUM_PLANES: if (isl_drm_modifier_has_aux(image->modifier)) { assert(!image->planar_format || image->planar_format->nplanes == 1); -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] i965: return the fourcc saved in __DRIimage
On Thu, 5 Apr 2018 20:57:46 +0300 Tapani Pälli wrote: > On 05.04.2018 18:43, James Xiong wrote: > > On Thu, 5 Apr 2018 14:30:02 +0300 > > Tapani Pälli wrote: > > > >> On 04/05/2018 02:51 AM, James Xiong wrote: > >>> From: "Xiong, James" > >>> > >>> The planar_format in __DRIimage contains the original fourcc > >>> used to create the image, if it's set, return the saved fourcc > >>> directly; Otherwise fall back to the old way. > >>> > >>> Also we should validate the input parameter "value" first as it > >>> might be NULL based on the SPEC. > >>> > >>> v2: fall back to intel_lookup_fourcc() when planar_format is NULL > >>> (by Dongwon & Matt Roper) > >>> > >>> Signed-off-by: Xiong, James > >>> --- > >>>src/mesa/drivers/dri/i965/intel_screen.c | 15 --- > >>>1 file changed, 12 insertions(+), 3 deletions(-) > >>> > >>> diff --git a/src/mesa/drivers/dri/i965/intel_screen.c > >>> b/src/mesa/drivers/dri/i965/intel_screen.c index 7df8bc4..aeecef3 > >>> 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c > >>> +++ b/src/mesa/drivers/dri/i965/intel_screen.c > >>> @@ -388,10 +388,16 @@ intel_image_format_lookup(int fourcc) > >>> return NULL; > >>>} > >>> > >>> -static boolean intel_lookup_fourcc(int dri_format, int *fourcc) > >>> +static boolean > >>> +intel_image_get_fourcc(__DRIimage *image, int *fourcc) > >>>{ > >>> + if (image->planar_format) { > >>> + *fourcc = image->planar_format->fourcc; > >>> + return true; > >>> + } > >>> + > >>> for (unsigned i = 0; i < ARRAY_SIZE(intel_image_formats); > >>> i++) { > >>> - if (intel_image_formats[i].planes[0].dri_format == > >>> dri_format) { > >>> + if (intel_image_formats[i].planes[0].dri_format == > >>> image->dri_format) { *fourcc = intel_image_formats[i].fourcc; > >>> return true; > >>> } > >>> @@ -844,6 +850,9 @@ intel_create_image_with_modifiers(__DRIscreen > >>> *dri_screen, static GLboolean > >>>intel_query_image(__DRIimage *image, int attrib, int *value) > >>>{ > >>> + if (value == NULL) > >>> + return false; > >>> + > >> > >> I would remove this check, we've been fine many years without it. > > The function spec does say: ", and > > may be NULL, in which case no value is retrieved." > > it's better to stick to it and have an extra check than > > segmentation fault, what do you say? > > Function modified here 'intel_query_image' is part of DRI interface > (dri_interface.h) with no such spec, it is used from many existing > places and none of them seem to be passing NULL. > > Now that I know you are using eglExportDMABUFImageMESA, I can also > see that dri2_export_dma_buf_image_mesa implementation does not call > this API when 'fds', 'strides' or 'offsets' is NULL, it checks this > in the implementation before calling. > You are right it's already been checked in the upper level. I will remove it. > > >> > >>> switch (attrib) { > >>> case __DRI_IMAGE_ATTRIB_STRIDE: > >>> *value = image->pitch; > >>> @@ -870,7 +879,7 @@ intel_query_image(__DRIimage *image, int > >>> attrib, int *value) case __DRI_IMAGE_ATTRIB_FD: > >>> return !brw_bo_gem_export_to_prime(image->bo, value); > >>> case __DRI_IMAGE_ATTRIB_FOURCC: > >>> - return intel_lookup_fourcc(image->dri_format, value); > >>> + return intel_image_get_fourcc(image, value); > >>> case __DRI_IMAGE_ATTRIB_NUM_PLANES: > >>> if (isl_drm_modifier_has_aux(image->modifier)) { > >>> assert(!image->planar_format || > >>> image->planar_format->nplanes == 1); > > > > // Tapani ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] i965: init image's planar_format in intel_create_image_from_texture
On Thu, 5 Apr 2018 08:56:54 -0700 Jason Ekstrand wrote: > On Thu, Apr 5, 2018 at 8:45 AM, James Xiong > wrote: > > > On Thu, 5 Apr 2018 08:24:27 -0700 > > Jason Ekstrand wrote: > > > > > Does this fix a bug? If so, what? > > Jason, yes. I am sorry for the confusion, please see my earlier > > reply to Tapani. > > > > If it fixes a bug then the commit message should include a > description of the bug fixed. If the bug is a regression, it should > have a "Fixes: SHA1" tag indicating where the regression originated > and if it is a bug that has been around for a very long time, it > should have a "Cc: mesa-sta...@lists.freedesktop.org" tag. If the > bug is filed in bugzilla, it should have a "Bugzilla: " tag. > > --Jason Thanks for the tip, Jason. I will make the corresponding changes in the next version. > > > > > > > > > On April 4, 2018 16:57:21 James Xiong > > > wrote: > > > > From: "Xiong, James" > > > > > > > > When creating a image from a texture, initialize the image's > > > > planar_format with the texture's. > > > > > > > > Signed-off-by: Xiong, James > > > > --- > > > > src/mesa/drivers/dri/i965/intel_screen.c | 1 + > > > > 1 file changed, 1 insertion(+) > > > > > > > > diff --git a/src/mesa/drivers/dri/i965/intel_screen.c > > > > b/src/mesa/drivers/dri/i965/intel_screen.c > > > > index dcb98da..7df8bc4 100644 > > > > --- a/src/mesa/drivers/dri/i965/intel_screen.c > > > > +++ b/src/mesa/drivers/dri/i965/intel_screen.c > > > > @@ -578,6 +578,7 @@ intel_create_image_from_texture(__DRIcontext > > > > *context, int target, > > > >intel_setup_image_from_mipmap_tree(brw, image, iobj->mt, > > > > level, zoffset); image->dri_format = > > > > driGLFormatToImageFormat(image->format); > > > > image->has_depthstencil = iobj->mt->stencil_mt? true : false; > > > > + image->planar_format = iobj->planar_format; > > > >if (image->dri_format == MESA_FORMAT_NONE) { > > > > *error = __DRI_IMAGE_ERROR_BAD_PARAMETER; > > > > free(image); > > > > -- > > > > 2.7.4 > > > > > > > > ___ > > > > mesa-dev mailing list > > > > mesa-dev@lists.freedesktop.org > > > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > > > > > > > > > > > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] i965: init image's planar_format in intel_create_image_from_texture
On Thu, 5 Apr 2018 08:24:27 -0700 Jason Ekstrand wrote: > Does this fix a bug? If so, what? Jason, yes. I am sorry for the confusion, please see my earlier reply to Tapani. > > On April 4, 2018 16:57:21 James Xiong wrote: > > > From: "Xiong, James" > > > > When creating a image from a texture, initialize the image's > > planar_format with the texture's. > > > > Signed-off-by: Xiong, James > > --- > > src/mesa/drivers/dri/i965/intel_screen.c | 1 + > > 1 file changed, 1 insertion(+) > > > > diff --git a/src/mesa/drivers/dri/i965/intel_screen.c > > b/src/mesa/drivers/dri/i965/intel_screen.c > > index dcb98da..7df8bc4 100644 > > --- a/src/mesa/drivers/dri/i965/intel_screen.c > > +++ b/src/mesa/drivers/dri/i965/intel_screen.c > > @@ -578,6 +578,7 @@ intel_create_image_from_texture(__DRIcontext > > *context, int target, > >intel_setup_image_from_mipmap_tree(brw, image, iobj->mt, level, > > zoffset); image->dri_format = > > driGLFormatToImageFormat(image->format); image->has_depthstencil = > > iobj->mt->stencil_mt? true : false; > > + image->planar_format = iobj->planar_format; > >if (image->dri_format == MESA_FORMAT_NONE) { > > *error = __DRI_IMAGE_ERROR_BAD_PARAMETER; > > free(image); > > -- > > 2.7.4 > > > > ___ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] i965: return the fourcc saved in __DRIimage
On Thu, 5 Apr 2018 14:30:02 +0300 Tapani Pälli wrote: > On 04/05/2018 02:51 AM, James Xiong wrote: > > From: "Xiong, James" > > > > The planar_format in __DRIimage contains the original fourcc > > used to create the image, if it's set, return the saved fourcc > > directly; Otherwise fall back to the old way. > > > > Also we should validate the input parameter "value" first as it > > might be NULL based on the SPEC. > > > > v2: fall back to intel_lookup_fourcc() when planar_format is NULL > >(by Dongwon & Matt Roper) > > > > Signed-off-by: Xiong, James > > --- > > src/mesa/drivers/dri/i965/intel_screen.c | 15 --- > > 1 file changed, 12 insertions(+), 3 deletions(-) > > > > diff --git a/src/mesa/drivers/dri/i965/intel_screen.c > > b/src/mesa/drivers/dri/i965/intel_screen.c index 7df8bc4..aeecef3 > > 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c > > +++ b/src/mesa/drivers/dri/i965/intel_screen.c > > @@ -388,10 +388,16 @@ intel_image_format_lookup(int fourcc) > > return NULL; > > } > > > > -static boolean intel_lookup_fourcc(int dri_format, int *fourcc) > > +static boolean > > +intel_image_get_fourcc(__DRIimage *image, int *fourcc) > > { > > + if (image->planar_format) { > > + *fourcc = image->planar_format->fourcc; > > + return true; > > + } > > + > > for (unsigned i = 0; i < ARRAY_SIZE(intel_image_formats); i++) > > { > > - if (intel_image_formats[i].planes[0].dri_format == > > dri_format) { > > + if (intel_image_formats[i].planes[0].dri_format == > > image->dri_format) { *fourcc = intel_image_formats[i].fourcc; > >return true; > > } > > @@ -844,6 +850,9 @@ intel_create_image_with_modifiers(__DRIscreen > > *dri_screen, static GLboolean > > intel_query_image(__DRIimage *image, int attrib, int *value) > > { > > + if (value == NULL) > > + return false; > > + > > I would remove this check, we've been fine many years without it. The function spec does say: ", and may be NULL, in which case no value is retrieved." it's better to stick to it and have an extra check than segmentation fault, what do you say? > > > switch (attrib) { > > case __DRI_IMAGE_ATTRIB_STRIDE: > > *value = image->pitch; > > @@ -870,7 +879,7 @@ intel_query_image(__DRIimage *image, int > > attrib, int *value) case __DRI_IMAGE_ATTRIB_FD: > > return !brw_bo_gem_export_to_prime(image->bo, value); > > case __DRI_IMAGE_ATTRIB_FOURCC: > > - return intel_lookup_fourcc(image->dri_format, value); > > + return intel_image_get_fourcc(image, value); > > case __DRI_IMAGE_ATTRIB_NUM_PLANES: > > if (isl_drm_modifier_has_aux(image->modifier)) { > >assert(!image->planar_format || > > image->planar_format->nplanes == 1); ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/2] wrong fourcc was returned for imported images
On Thu, 5 Apr 2018 15:05:26 +0300 Tapani Pälli wrote: > Hi; > > On 04/05/2018 02:51 AM, James Xiong wrote: > > From: "Xiong, James" > > > > The importer creates an image out of the imported FOURCC_NV12 > > texture, the image's dri_format is set to R8(same as the first > > plane's format), when it queries the image's fourcc, mesa goes > > through intel_image_format table and returns FOURCC_R8. > > Could you explain the use case a bit, I'm not sure I understand how > this happens. First someone imports dmabufs, creating EGLImage. Then > a texture is associated with that (glEGLImageTargetTexture2DOES?), is > it so that then we create yet another EGLImage from that texture or > am I completely lost here? :) > Sorry for the confusion. What actually happened was the exporter 1) calls eglCreateImageKHR() to create an image from a NV12 texture, 2) calls eglExportDMABUFImageQueryMESA() but the fourcc_r8 is returned instead of fourcc_nv12. Essentially, image->dri_format is the first plane's format, i.e. __DRI_IMAGE_FORMAT_R8 in our case, when querying fourcc, intel_lookup_fourcc() gets the following entry from the table intel_image_formats: { __DRI_IMAGE_FOURCC_R8, __DRI_IMAGE_COMPONENTS_R, 1, { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, } }, instead of the corret one: { __DRI_IMAGE_FOURCC_NV12, __DRI_IMAGE_COMPONENTS_Y_UV, 2, { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, { 1, 1, 1, __DRI_IMAGE_FORMAT_GR88, 2 } } }, To fix this, the image should inherit the texture's planar_format which contains the original fourcc; When the user application queries the fourcc, we can return the saved fourcc if planar_format is NULL; otherwise fallback to the original way to look up the fourcc. > > The solution is to 1) set the image's planar_format using the > > texture's in function intel_create_image_from_texture(). 2) when > > queried, return the saved fourcc@planar_format > > > > Xiong, James (2): > >i965: init image's planar_format in > > intel_create_image_from_texture i965: return the fourcc saved in > > __DRIimage > > > > src/mesa/drivers/dri/i965/intel_screen.c | 16 +--- > > 1 file changed, 13 insertions(+), 3 deletions(-) > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] i965: init image's planar_format in intel_create_image_from_texture
From: "Xiong, James" When creating a image from a texture, initialize the image's planar_format with the texture's. Signed-off-by: Xiong, James --- src/mesa/drivers/dri/i965/intel_screen.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index dcb98da..7df8bc4 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -578,6 +578,7 @@ intel_create_image_from_texture(__DRIcontext *context, int target, intel_setup_image_from_mipmap_tree(brw, image, iobj->mt, level, zoffset); image->dri_format = driGLFormatToImageFormat(image->format); image->has_depthstencil = iobj->mt->stencil_mt? true : false; + image->planar_format = iobj->planar_format; if (image->dri_format == MESA_FORMAT_NONE) { *error = __DRI_IMAGE_ERROR_BAD_PARAMETER; free(image); -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/2] wrong fourcc was returned for imported images
From: "Xiong, James" The importer creates an image out of the imported FOURCC_NV12 texture, the image's dri_format is set to R8(same as the first plane's format), when it queries the image's fourcc, mesa goes through intel_image_format table and returns FOURCC_R8. The solution is to 1) set the image's planar_format using the texture's in function intel_create_image_from_texture(). 2) when queried, return the saved fourcc@planar_format Xiong, James (2): i965: init image's planar_format in intel_create_image_from_texture i965: return the fourcc saved in __DRIimage src/mesa/drivers/dri/i965/intel_screen.c | 16 +--- 1 file changed, 13 insertions(+), 3 deletions(-) -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] i965: return the fourcc saved in __DRIimage
From: "Xiong, James" The planar_format in __DRIimage contains the original fourcc used to create the image, if it's set, return the saved fourcc directly; Otherwise fall back to the old way. Also we should validate the input parameter "value" first as it might be NULL based on the SPEC. v2: fall back to intel_lookup_fourcc() when planar_format is NULL (by Dongwon & Matt Roper) Signed-off-by: Xiong, James --- src/mesa/drivers/dri/i965/intel_screen.c | 15 --- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index 7df8bc4..aeecef3 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -388,10 +388,16 @@ intel_image_format_lookup(int fourcc) return NULL; } -static boolean intel_lookup_fourcc(int dri_format, int *fourcc) +static boolean +intel_image_get_fourcc(__DRIimage *image, int *fourcc) { + if (image->planar_format) { + *fourcc = image->planar_format->fourcc; + return true; + } + for (unsigned i = 0; i < ARRAY_SIZE(intel_image_formats); i++) { - if (intel_image_formats[i].planes[0].dri_format == dri_format) { + if (intel_image_formats[i].planes[0].dri_format == image->dri_format) { *fourcc = intel_image_formats[i].fourcc; return true; } @@ -844,6 +850,9 @@ intel_create_image_with_modifiers(__DRIscreen *dri_screen, static GLboolean intel_query_image(__DRIimage *image, int attrib, int *value) { + if (value == NULL) + return false; + switch (attrib) { case __DRI_IMAGE_ATTRIB_STRIDE: *value = image->pitch; @@ -870,7 +879,7 @@ intel_query_image(__DRIimage *image, int attrib, int *value) case __DRI_IMAGE_ATTRIB_FD: return !brw_bo_gem_export_to_prime(image->bo, value); case __DRI_IMAGE_ATTRIB_FOURCC: - return intel_lookup_fourcc(image->dri_format, value); + return intel_image_get_fourcc(image, value); case __DRI_IMAGE_ATTRIB_NUM_PLANES: if (isl_drm_modifier_has_aux(image->modifier)) { assert(!image->planar_format || image->planar_format->nplanes == 1); -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev