Module: Mesa Branch: main Commit: 782efa29e6522ffd4d871074f65cae8485cf2a84 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=782efa29e6522ffd4d871074f65cae8485cf2a84
Author: Paulo Zanoni <[email protected]> Date: Tue May 4 17:42:05 2021 -0700 iris: have a single border color pool per bufmgr Have a single border color pool per bufmgr instead of per context. We want to have a single VM shared among every context and the border color pool is the last feature preventing us from having that. Previously we had 1024 colors per context but once the buffer was full we just waited for the buffer to be unused and restarted it. After this patch we have 4096 colors for every single context and we can't just flush buffers if they are full, so we simply return black. There are many strategies we could try to implement to help alleviate this new 4096 limit, none of which are implemented by this patch: - We could just expand the buffer to the full 16MB we can use, allowing 262144 colors. - We could use multiple buffers and make the contexts refcount them, so eventually older buffers would reach zero references and be recycled, moving us to a working set maximum from a lifetime maximum. - We could also make the border color pool be a standard memzone and then give smaller buffers to each context when they need, so the limit would be in the number of contexts that can use border color pools. This was my first implementation but Ken suggested I switch to the one provided by this patch, which is simpler. Keep it like this since border colors don't seem to be used very much and other Mesa drivers such as radeonsi also seem to employ the "return black once we reach the limit" strategy. As a last note, we could also move the contents of iris_border_color.c to iris_bufmgr.c in order to avoid breaking some abstractions we have in Iris, like we do with iris_bufmgr_get_border_color_pool(). I can do this in case we want it. v2: Switch from standard memzone to a per-screen thing (see above). v3: Actually make it per bufmgr. Just making it per screen is not enough, since screens can share the same VM, an example being the gputest benchmark suite. v4: Rebase. v5: Remove dead code, lock around hash table lookup (Ken). v6: Simple rebase. v7: Another rebase (for_each_batch). Reviewed-by: Kenneth Graunke <[email protected]> Signed-off-by: Paulo Zanoni <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12028> --- src/gallium/drivers/iris/iris_border_color.c | 86 ++++++++++++---------------- src/gallium/drivers/iris/iris_bufmgr.c | 12 ++++ src/gallium/drivers/iris/iris_bufmgr.h | 29 +++++++++- src/gallium/drivers/iris/iris_context.c | 2 - src/gallium/drivers/iris/iris_context.h | 24 -------- src/gallium/drivers/iris/iris_state.c | 34 ++++++----- 6 files changed, 95 insertions(+), 92 deletions(-) diff --git a/src/gallium/drivers/iris/iris_border_color.c b/src/gallium/drivers/iris/iris_border_color.c index 17097a6b0f6..e7f1015c60c 100644 --- a/src/gallium/drivers/iris/iris_border_color.c +++ b/src/gallium/drivers/iris/iris_border_color.c @@ -39,7 +39,10 @@ * table of known colors, and reuse the same entries. This avoids * wasting a lot of space in the pool. * - * If it ever does fill up, we simply flush. + * If it ever does fill up, we simply return the black border. We + * can't simply flush since the BO is shared by every context. If we + * ever need we may choose to have multiple BOs, refcount them and + * then recycle when unused. */ #include <stdlib.h> @@ -49,6 +52,7 @@ #include "iris_context.h" #define BC_ALIGNMENT 64 +#define BC_BLACK 64 static bool color_equals(const void *a, const void *b) @@ -62,13 +66,13 @@ color_hash(const void *key) return _mesa_hash_data(key, sizeof(union pipe_color_union)); } -static void -iris_reset_border_color_pool(struct iris_border_color_pool *pool, - struct iris_bufmgr *bufmgr) +void +iris_init_border_color_pool(struct iris_bufmgr *bufmgr, + struct iris_border_color_pool *pool) { - _mesa_hash_table_clear(pool->ht, NULL); + simple_mtx_init(&pool->lock, mtx_plain); - iris_bo_unreference(pool->bo); + pool->ht = _mesa_hash_table_create(NULL, color_hash, color_equals); pool->bo = iris_bo_alloc(bufmgr, "border colors", IRIS_BORDER_COLOR_POOL_SIZE, 1, @@ -77,50 +81,18 @@ iris_reset_border_color_pool(struct iris_border_color_pool *pool, /* Don't make 0 a valid offset - tools treat that as a NULL pointer. */ pool->insert_point = BC_ALIGNMENT; -} - -void -iris_init_border_color_pool(struct iris_context *ice) -{ - struct iris_screen *screen = (void *) ice->ctx.screen; - struct iris_bufmgr *bufmgr = screen->bufmgr; - struct iris_border_color_pool *pool = &ice->state.border_color_pool; - - pool->bo = NULL; - pool->ht = _mesa_hash_table_create(ice, color_hash, color_equals); - - iris_reset_border_color_pool(pool, bufmgr); + union pipe_color_union black = {.f = { 0.0, 0.0, 0.0, 1.0 }}; + ASSERTED uint32_t black_offset = iris_upload_border_color(pool, &black); + assert(black_offset == BC_BLACK); } void -iris_destroy_border_color_pool(struct iris_context *ice) +iris_destroy_border_color_pool(struct iris_border_color_pool *pool) { - struct iris_border_color_pool *pool = &ice->state.border_color_pool; iris_bo_unreference(pool->bo); ralloc_free(pool->ht); -} - -/** - * Reserve space for a number of border colors. If no space, flushes any - * batches that are referring to the old BO and makes a new one. - */ -void -iris_border_color_pool_reserve(struct iris_context *ice, unsigned count) -{ - struct iris_border_color_pool *pool = &ice->state.border_color_pool; - const unsigned remaining_entries = - (IRIS_BORDER_COLOR_POOL_SIZE - pool->insert_point) / BC_ALIGNMENT; - - if (remaining_entries < count) { - /* It's safe to flush because we're called outside of state upload. */ - iris_foreach_batch(ice, batch) { - if (iris_batch_references(batch, pool->bo)) - iris_batch_flush(batch); - } - - iris_reset_border_color_pool(pool, pool->bo->bufmgr); - } + simple_mtx_destroy(&pool->lock); } /** @@ -130,25 +102,39 @@ iris_border_color_pool_reserve(struct iris_context *ice, unsigned count) * reserve space ahead of time by calling iris_border_color_pool_reserve(). */ uint32_t -iris_upload_border_color(struct iris_context *ice, +iris_upload_border_color(struct iris_border_color_pool *pool, union pipe_color_union *color) { - struct iris_border_color_pool *pool = &ice->state.border_color_pool; - + uint32_t offset; uint32_t hash = color_hash(color); + + simple_mtx_lock(&pool->lock); + struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(pool->ht, hash, color); - if (entry) - return (uintptr_t) entry->data; + if (entry) { + offset = (uintptr_t) entry->data; + goto out; + } - assert(pool->insert_point + BC_ALIGNMENT < IRIS_BORDER_COLOR_POOL_SIZE); + if (pool->insert_point + BC_ALIGNMENT > IRIS_BORDER_COLOR_POOL_SIZE) { + static bool warned = false; + if (!warned) { + fprintf(stderr, "Border color pool is full. Using black instead.\n"); + warned = true; + } + offset = BC_BLACK; + goto out; + } - uint32_t offset = pool->insert_point; + offset = pool->insert_point; memcpy(pool->map + offset, color, sizeof(*color)); pool->insert_point += BC_ALIGNMENT; _mesa_hash_table_insert_pre_hashed(pool->ht, hash, pool->map + offset, (void *) (uintptr_t) offset); +out: + simple_mtx_unlock(&pool->lock); return offset; } diff --git a/src/gallium/drivers/iris/iris_bufmgr.c b/src/gallium/drivers/iris/iris_bufmgr.c index 34bb14be754..7f68ed29501 100644 --- a/src/gallium/drivers/iris/iris_bufmgr.c +++ b/src/gallium/drivers/iris/iris_bufmgr.c @@ -238,6 +238,8 @@ struct iris_bufmgr { struct intel_aux_map_context *aux_map_ctx; struct pb_slabs bo_slabs[NUM_SLAB_ALLOCATORS]; + + struct iris_border_color_pool border_color_pool; }; static simple_mtx_t global_bufmgr_list_mutex = _SIMPLE_MTX_INITIALIZER_NP; @@ -1727,6 +1729,8 @@ iris_bo_wait(struct iris_bo *bo, int64_t timeout_ns) static void iris_bufmgr_destroy(struct iris_bufmgr *bufmgr) { + iris_destroy_border_color_pool(&bufmgr->border_color_pool); + /* Free aux-map buffers */ intel_aux_map_finish(bufmgr->aux_map_ctx); @@ -2448,6 +2452,8 @@ iris_bufmgr_create(struct intel_device_info *devinfo, int fd, bool bo_reuse) assert(bufmgr->aux_map_ctx); } + iris_init_border_color_pool(bufmgr, &bufmgr->border_color_pool); + return bufmgr; } @@ -2531,3 +2537,9 @@ iris_bufmgr_get_bo_deps_lock(struct iris_bufmgr *bufmgr) { return &bufmgr->bo_deps_lock; } + +struct iris_border_color_pool * +iris_bufmgr_get_border_color_pool(struct iris_bufmgr *bufmgr) +{ + return &bufmgr->border_color_pool; +} diff --git a/src/gallium/drivers/iris/iris_bufmgr.h b/src/gallium/drivers/iris/iris_bufmgr.h index 1d9259f715c..df817077aba 100644 --- a/src/gallium/drivers/iris/iris_bufmgr.h +++ b/src/gallium/drivers/iris/iris_bufmgr.h @@ -94,7 +94,7 @@ enum iris_memory_zone { #define IRIS_MEMZONE_OTHER_START (3ull * (1ull << 32)) #define IRIS_BORDER_COLOR_POOL_ADDRESS IRIS_MEMZONE_DYNAMIC_START -#define IRIS_BORDER_COLOR_POOL_SIZE (64 * 1024) +#define IRIS_BORDER_COLOR_POOL_SIZE (64 * 4096) /** * Classification of the various incoherent caches of the GPU into a number of @@ -552,4 +552,31 @@ int iris_bufmgr_create_screen_id(struct iris_bufmgr *bufmgr); simple_mtx_t *iris_bufmgr_get_bo_deps_lock(struct iris_bufmgr *bufmgr); +/** + * A pool containing SAMPLER_BORDER_COLOR_STATE entries. + * + * See iris_border_color.c for more information. + */ +struct iris_border_color_pool { + struct iris_bo *bo; + void *map; + unsigned insert_point; + + /** Map from border colors to offsets in the buffer. */ + struct hash_table *ht; + + /** Protects insert_point and the hash table. */ + simple_mtx_t lock; +}; + +struct iris_border_color_pool *iris_bufmgr_get_border_color_pool( + struct iris_bufmgr *bufmgr); + +/* iris_border_color.c */ +void iris_init_border_color_pool(struct iris_bufmgr *bufmgr, + struct iris_border_color_pool *pool); +void iris_destroy_border_color_pool(struct iris_border_color_pool *pool); +uint32_t iris_upload_border_color(struct iris_border_color_pool *pool, + union pipe_color_union *color); + #endif /* IRIS_BUFMGR_H */ diff --git a/src/gallium/drivers/iris/iris_context.c b/src/gallium/drivers/iris/iris_context.c index b91f6f8ef8b..e2ea72827ad 100644 --- a/src/gallium/drivers/iris/iris_context.c +++ b/src/gallium/drivers/iris/iris_context.c @@ -233,7 +233,6 @@ iris_destroy_context(struct pipe_context *ctx) pipe_resource_reference(&ice->shaders.scratch_surfs[i].res, NULL); iris_destroy_program_cache(ice); - iris_destroy_border_color_pool(ice); if (screen->measure.config) iris_destroy_ctx_measure(ice); @@ -329,7 +328,6 @@ iris_create_context(struct pipe_screen *pscreen, void *priv, unsigned flags) iris_init_perfquery_functions(ctx); iris_init_program_cache(ice); - iris_init_border_color_pool(ice); iris_init_binder(ice); slab_create_child(&ice->transfer_pool, &screen->transfer_pool); diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h index 69504dc52f3..422bd10a587 100644 --- a/src/gallium/drivers/iris/iris_context.h +++ b/src/gallium/drivers/iris/iris_context.h @@ -572,20 +572,6 @@ struct iris_stream_output_target { bool zero_offset; }; -/** - * A pool containing SAMPLER_BORDER_COLOR_STATE entries. - * - * See iris_border_color.c for more information. - */ -struct iris_border_color_pool { - struct iris_bo *bo; - void *map; - unsigned insert_point; - - /** Map from border colors to offsets in the buffer. */ - struct hash_table *ht; -}; - /** * The API context (derived from pipe_context). * @@ -815,8 +801,6 @@ struct iris_context { struct iris_binder binder; - struct iris_border_color_pool border_color_pool; - /** The high 16-bits of the last VBO/index buffer addresses */ uint16_t last_vbo_high_bits[33]; uint16_t last_index_bo_high_bits; @@ -936,14 +920,6 @@ void iris_flush_all_caches(struct iris_batch *batch); void iris_init_flush_functions(struct pipe_context *ctx); -/* iris_border_color.c */ - -void iris_init_border_color_pool(struct iris_context *ice); -void iris_destroy_border_color_pool(struct iris_context *ice); -void iris_border_color_pool_reserve(struct iris_context *ice, unsigned count); -uint32_t iris_upload_border_color(struct iris_context *ice, - union pipe_color_union *color); - /* iris_program.c */ void iris_upload_ubo_ssbo_surf_state(struct iris_context *ice, struct pipe_shader_buffer *buf, diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 644dc7199c7..e7765abb6aa 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -2161,10 +2161,11 @@ iris_bind_sampler_states(struct pipe_context *ctx, static void iris_upload_sampler_states(struct iris_context *ice, gl_shader_stage stage) { - UNUSED struct iris_screen *screen = (void *) ice->ctx.screen; - UNUSED const struct intel_device_info *devinfo = &screen->devinfo; + struct iris_screen *screen = (struct iris_screen *) ice->ctx.screen; struct iris_shader_state *shs = &ice->state.shaders[stage]; const struct shader_info *info = iris_get_shader_info(ice, stage); + struct iris_border_color_pool *border_color_pool = + iris_bufmgr_get_border_color_pool(screen->bufmgr); /* We assume gallium frontends will call pipe->bind_sampler_states() * if the program's number of textures changes. @@ -2192,9 +2193,6 @@ iris_upload_sampler_states(struct iris_context *ice, gl_shader_stage stage) shs->sampler_table.offset += iris_bo_offset_from_base_address(bo); - /* Make sure all land in the same BO */ - iris_border_color_pool_reserve(ice, IRIS_MAX_TEXTURE_SAMPLERS); - ice->state.need_border_colors &= ~(1 << stage); for (int i = 0; i < count; i++) { @@ -2245,7 +2243,8 @@ iris_upload_sampler_states(struct iris_context *ice, gl_shader_stage stage) } /* Stream out the border color and merge the pointer. */ - uint32_t offset = iris_upload_border_color(ice, color); + uint32_t offset = iris_upload_border_color(border_color_pool, + color); uint32_t dynamic[GENX(SAMPLER_STATE_length)]; iris_pack_state(GENX(SAMPLER_STATE), dynamic, dyns) { @@ -5763,6 +5762,9 @@ iris_upload_dirty_render_state(struct iris_context *ice, struct iris_batch *batch, const struct pipe_draw_info *draw) { + struct iris_screen *screen = batch->screen; + struct iris_border_color_pool *border_color_pool = + iris_bufmgr_get_border_color_pool(screen->bufmgr); const uint64_t dirty = ice->state.dirty; const uint64_t stage_dirty = ice->state.stage_dirty; @@ -5868,8 +5870,8 @@ iris_upload_dirty_render_state(struct iris_context *ice, assert(ice->shaders.urb.size[i] != 0); } - intel_get_urb_config(&batch->screen->devinfo, - batch->screen->l3_config_3d, + intel_get_urb_config(&screen->devinfo, + screen->l3_config_3d, ice->shaders.prog[MESA_SHADER_TESS_EVAL] != NULL, ice->shaders.prog[MESA_SHADER_GEOMETRY] != NULL, ice->shaders.urb.size, @@ -6071,8 +6073,7 @@ iris_upload_dirty_render_state(struct iris_context *ice, } if (ice->state.need_border_colors) - iris_use_pinned_bo(batch, ice->state.border_color_pool.bo, false, - IRIS_DOMAIN_NONE); + iris_use_pinned_bo(batch, border_color_pool->bo, false, IRIS_DOMAIN_NONE); if (dirty & IRIS_DIRTY_MULTISAMPLE) { iris_emit_cmd(batch, GENX(3DSTATE_MULTISAMPLE), ms) { @@ -6468,8 +6469,8 @@ iris_upload_dirty_render_state(struct iris_context *ice, */ iris_emit_pipe_control_write(batch, "WA for stencil state", PIPE_CONTROL_WRITE_IMMEDIATE, - batch->screen->workaround_address.bo, - batch->screen->workaround_address.offset, 0); + screen->workaround_address.bo, + screen->workaround_address.offset, 0); } } @@ -6519,7 +6520,7 @@ iris_upload_dirty_render_state(struct iris_context *ice, vb.BufferStartingAddress = ro_bo(NULL, res->bo->address + (int) ice->draw.draw_params.offset); - vb.MOCS = iris_mocs(res->bo, &batch->screen->isl_dev, + vb.MOCS = iris_mocs(res->bo, &screen->isl_dev, ISL_SURF_USAGE_VERTEX_BUFFER_BIT); #if GFX_VER >= 12 vb.L3BypassDisable = true; @@ -6545,7 +6546,7 @@ iris_upload_dirty_render_state(struct iris_context *ice, vb.BufferStartingAddress = ro_bo(NULL, res->bo->address + (int) ice->draw.derived_draw_params.offset); - vb.MOCS = iris_mocs(res->bo, &batch->screen->isl_dev, + vb.MOCS = iris_mocs(res->bo, &screen->isl_dev, ISL_SURF_USAGE_VERTEX_BUFFER_BIT); #if GFX_VER >= 12 vb.L3BypassDisable = true; @@ -7281,10 +7282,13 @@ iris_upload_compute_state(struct iris_context *ice, struct iris_batch *batch, const struct pipe_grid_info *grid) { + struct iris_screen *screen = batch->screen; const uint64_t stage_dirty = ice->state.stage_dirty; struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE]; struct iris_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_COMPUTE]; + struct iris_border_color_pool *border_color_pool = + iris_bufmgr_get_border_color_pool(screen->bufmgr); iris_batch_sync_region_start(batch); @@ -7312,7 +7316,7 @@ iris_upload_compute_state(struct iris_context *ice, IRIS_DOMAIN_NONE); if (ice->state.need_border_colors) - iris_use_pinned_bo(batch, ice->state.border_color_pool.bo, false, + iris_use_pinned_bo(batch, border_color_pool->bo, false, IRIS_DOMAIN_NONE); #if GFX_VER >= 12
