From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/auxiliary/util/u_threaded_context.c | 2 +- src/gallium/drivers/freedreno/freedreno_batch.c | 2 +- src/gallium/drivers/radeonsi/si_pipe.c | 2 +- src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 2 +- src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 2 +- src/util/disk_cache.c | 2 +- src/util/u_queue.c | 19 ++++++++++++++++++- src/util/u_queue.h | 6 +++++- 8 files changed, 29 insertions(+), 8 deletions(-)
diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c index 71211e6..554cc88 100644 --- a/src/gallium/auxiliary/util/u_threaded_context.c +++ b/src/gallium/auxiliary/util/u_threaded_context.c @@ -2194,21 +2194,21 @@ threaded_context_create(struct pipe_context *pipe, else tc->base.const_uploader = u_upload_clone(&tc->base, pipe->const_uploader); if (!tc->base.stream_uploader || !tc->base.const_uploader) goto fail; /* The queue size is the number of batches "waiting". Batches are removed * from the queue before being executed, so keep one tc_batch slot for that * execution. Also, keep one unused slot for an unflushed batch. */ - if (!util_queue_init(&tc->queue, "gallium_drv", TC_MAX_BATCHES - 2, 1)) + if (!util_queue_init(&tc->queue, "gallium_drv", TC_MAX_BATCHES - 2, 1, 0)) goto fail; for (unsigned i = 0; i < TC_MAX_BATCHES; i++) { tc->batch_slots[i].sentinel = TC_SENTINEL; tc->batch_slots[i].pipe = pipe; util_queue_fence_init(&tc->batch_slots[i].fence); } LIST_INITHEAD(&tc->unflushed_queries); diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c b/src/gallium/drivers/freedreno/freedreno_batch.c index 5783ee8..33b6240 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.c +++ b/src/gallium/drivers/freedreno/freedreno_batch.c @@ -265,21 +265,21 @@ batch_flush(struct fd_batch *batch) fd_batch_set_stage(batch, FD_STAGE_NULL); fd_context_all_dirty(batch->ctx); batch_flush_reset_dependencies(batch, true); if (batch->ctx->screen->reorder) { struct fd_batch *tmp = NULL; fd_batch_reference(&tmp, batch); if (!util_queue_is_initialized(&batch->ctx->flush_queue)) - util_queue_init(&batch->ctx->flush_queue, "flush_queue", 16, 1); + util_queue_init(&batch->ctx->flush_queue, "flush_queue", 16, 1, 0); util_queue_add_job(&batch->ctx->flush_queue, batch, &batch->flush_fence, batch_flush_func, batch_cleanup_func); } else { fd_gmem_render_tiles(batch); batch_reset_resources(batch); } debug_assert(batch->reference.count > 0); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 4704304..8bf6fd9 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -882,21 +882,21 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) } /* Only enable as many threads as we have target machines, but at most * the number of CPUs - 1 if there is more than one. */ num_cpus = sysconf(_SC_NPROCESSORS_ONLN); num_cpus = MAX2(1, num_cpus - 1); num_compiler_threads = MIN2(num_cpus, ARRAY_SIZE(sscreen->tm)); if (!util_queue_init(&sscreen->shader_compiler_queue, "si_shader", - 32, num_compiler_threads)) { + 32, num_compiler_threads, 0)) { si_destroy_shader_cache(sscreen); FREE(sscreen); return NULL; } si_handle_env_var_force_family(sscreen); if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false)) si_init_perfcounters(sscreen); diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index c8bd60e..43f2ed2 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -298,21 +298,21 @@ amdgpu_winsys_create(int fd, radeon_screen_create_t screen_create) ws->base.read_registers = amdgpu_read_registers; amdgpu_bo_init_functions(ws); amdgpu_cs_init_functions(ws); amdgpu_surface_init_functions(ws); LIST_INITHEAD(&ws->global_bo_list); (void) mtx_init(&ws->global_bo_list_lock, mtx_plain); (void) mtx_init(&ws->bo_fence_lock, mtx_plain); - if (!util_queue_init(&ws->cs_queue, "amdgpu_cs", 8, 1)) { + if (!util_queue_init(&ws->cs_queue, "amdgpu_cs", 8, 1, 0)) { amdgpu_winsys_destroy(&ws->base); mtx_unlock(&dev_tab_mutex); return NULL; } /* Create the screen at the end. The winsys must be initialized * completely. * * Alternatively, we could create the screen based on "ws->gen" * and link all drivers into one binary blob. */ diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index a485615..9bbffa5 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -814,21 +814,21 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create) (void) mtx_init(&ws->bo_handles_mutex, mtx_plain); (void) mtx_init(&ws->bo_va_mutex, mtx_plain); (void) mtx_init(&ws->bo_fence_lock, mtx_plain); ws->va_offset = ws->va_start; list_inithead(&ws->va_holes); /* TTM aligns the BO size to the CPU page size */ ws->info.gart_page_size = sysconf(_SC_PAGESIZE); if (ws->num_cpus > 1 && debug_get_option_thread()) - util_queue_init(&ws->cs_queue, "radeon_cs", 8, 1); + util_queue_init(&ws->cs_queue, "radeon_cs", 8, 1, 0); /* Create the screen at the end. The winsys must be initialized * completely. * * Alternatively, we could create the screen based on "ws->gen" * and link all drivers into one binary blob. */ ws->base.screen = screen_create(&ws->base); if (!ws->base.screen) { radeon_winsys_destroy(&ws->base); mtx_unlock(&fd_tab_mutex); diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c index 138d7ec..b222987 100644 --- a/src/util/disk_cache.c +++ b/src/util/disk_cache.c @@ -335,21 +335,21 @@ disk_cache_create(const char *gpu_name, const char *timestamp, } cache->max_size = max_size; /* A limit of 32 jobs was choosen as observations of Deus Ex start-up times * showed that we reached at most 11 jobs on an Intel i5-6400 CPU@2.70GHz * (a fairly modest desktop CPU). 1 thread was chosen because we don't * really care about getting things to disk quickly just that it's not * blocking other tasks. */ - util_queue_init(&cache->cache_queue, "disk_cache", 32, 1); + util_queue_init(&cache->cache_queue, "disk_cache", 32, 1, 0); /* Create driver id keys */ size_t ts_size = strlen(timestamp) + 1; size_t gpu_name_size = strlen(gpu_name) + 1; cache->driver_keys_blob_size = ts_size; cache->driver_keys_blob_size += gpu_name_size; /* We sometimes store entire structs that contains a pointers in the cache, * use pointer size as a key to avoid hard to debug issues. */ diff --git a/src/util/u_queue.c b/src/util/u_queue.c index 99de34c..32edb5e 100644 --- a/src/util/u_queue.c +++ b/src/util/u_queue.c @@ -140,20 +140,35 @@ util_queue_thread_func(void *input) int thread_index = ((struct thread_input*)input)->thread_index; free(input); if (queue->name) { char name[16]; util_snprintf(name, sizeof(name), "%s:%i", queue->name, thread_index); u_thread_setname(name); } + if (queue->flags & UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY) { +#if defined(__linux__) + struct sched_param sched_param = {0}; + + /* The nice() function can only set a maximum of 19. + * SCHED_IDLE is the same as nice = 20. + * + * Note that Linux only allows decreasing the priority. The original + * priority can't be restored. + */ + pthread_setschedparam(queue->threads[thread_index], SCHED_IDLE, + &sched_param); +#endif + } + while (1) { struct util_queue_job job; mtx_lock(&queue->lock); assert(queue->num_queued >= 0 && queue->num_queued <= queue->max_jobs); /* wait if the queue is empty */ while (!queue->kill_threads && queue->num_queued == 0) cnd_wait(&queue->has_queued_cond, &queue->lock); @@ -190,27 +205,29 @@ util_queue_thread_func(void *input) queue->read_idx = (queue->read_idx + queue->num_queued) % queue->max_jobs; queue->num_queued = 0; mtx_unlock(&queue->lock); return 0; } bool util_queue_init(struct util_queue *queue, const char *name, unsigned max_jobs, - unsigned num_threads) + unsigned num_threads, + unsigned flags) { unsigned i; memset(queue, 0, sizeof(*queue)); queue->name = name; queue->num_threads = num_threads; + queue->flags = flags; queue->max_jobs = max_jobs; queue->jobs = (struct util_queue_job*) calloc(max_jobs, sizeof(struct util_queue_job)); if (!queue->jobs) goto fail; (void) mtx_init(&queue->lock, mtx_plain); queue->num_queued = 0; diff --git a/src/util/u_queue.h b/src/util/u_queue.h index 9876865..916802c 100644 --- a/src/util/u_queue.h +++ b/src/util/u_queue.h @@ -35,20 +35,22 @@ #include <string.h> #include "util/list.h" #include "util/u_thread.h" #ifdef __cplusplus extern "C" { #endif +#define UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY (1 << 0) + /* Job completion fence. * Put this into your job structure. */ struct util_queue_fence { mtx_t mutex; cnd_t cond; int signalled; }; typedef void (*util_queue_execute_func)(void *job, int thread_index); @@ -62,33 +64,35 @@ struct util_queue_job { /* Put this into your context. */ struct util_queue { const char *name; mtx_t lock; cnd_t has_queued_cond; cnd_t has_space_cond; thrd_t *threads; int num_queued; unsigned num_threads; + unsigned flags; int kill_threads; int max_jobs; int write_idx, read_idx; /* ring buffer pointers */ struct util_queue_job *jobs; /* for cleanup at exit(), protected by exit_mutex */ struct list_head head; }; bool util_queue_init(struct util_queue *queue, const char *name, unsigned max_jobs, - unsigned num_threads); + unsigned num_threads, + unsigned flags); void util_queue_destroy(struct util_queue *queue); void util_queue_fence_init(struct util_queue_fence *fence); void util_queue_fence_destroy(struct util_queue_fence *fence); /* optional cleanup callback is called after fence is signaled: */ void util_queue_add_job(struct util_queue *queue, void *job, struct util_queue_fence *fence, util_queue_execute_func execute, util_queue_execute_func cleanup); -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev