When beignet added out-of-order execution support (7fd45f15), it made *all* command queues out-of-order, even if they were created as (and are reported by clGetCommandQueueInfo as) in-order.
Signed-off-by: Rebecca N. Palmer <rebecca_pal...@zoho.com> --- Not sure whether this one is actually worth it: it's clearly against the spec, but I'm not aware of it causing any real-world bugs. (I noticed it while investigating an issue that turned out to be unrelated.) Users who expect a queue to be in-order are probably not using events, and that makes a beignet queue effectively in-order. (This is *not* true of out-of-order queues in some other ICDs, e.g. pocl: it is true in Beignet because our flush (in particular the implicit one before a blocking copy) is also an ordering barrier, but the spec doesn't require that. If you choose not to take this, it might be a good idea to add a comment to cl_command_queue_wait_flush documenting that.) --- a/src/cl_api.c +++ b/src/cl_api.c @@ -283,7 +283,7 @@ clEnqueueSVMFree (cl_command_queue comma data->size = num_svm_pointers; data->ptr = user_data; - if (e_status == CL_COMPLETE) { + if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { // Sync mode, no need to queue event. err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); if (err != CL_SUCCESS) { @@ -429,7 +429,7 @@ cl_int clEnqueueSVMMemcpy (cl_command_qu data->const_ptr = src_ptr; data->size = size; - if (e_status == CL_COMPLETE) { + if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { // Sync mode, no need to queue event. err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); if (err != CL_SUCCESS) { @@ -441,6 +441,9 @@ cl_int clEnqueueSVMMemcpy (cl_command_qu break; } cl_command_queue_enqueue_event(command_queue, e); + if (blocking_copy) { + cl_event_wait_for_events_list(1, &e); + } } } while(0); @@ -518,7 +521,7 @@ cl_int clEnqueueSVMMemFill (cl_command_q data->pattern_size = pattern_size; data->size = size; - if (e_status == CL_COMPLETE) { + if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { // Sync mode, no need to queue event. err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); if (err != CL_SUCCESS) { --- a/src/cl_api_kernel.c +++ b/src/cl_api_kernel.c @@ -223,6 +223,7 @@ clEnqueueNDRangeKernel(cl_command_queue count *= global_wk_sz_rem[2] ? 2 : 1; const size_t *global_wk_all[2] = {global_wk_sz_div, global_wk_sz_rem}; + cl_bool allow_immediate_submit = cl_command_queue_allow_bypass_submit(command_queue); /* Go through the at most 8 cases and euque if there is work items left */ for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { @@ -263,7 +264,7 @@ clEnqueueNDRangeKernel(cl_command_queue break; } - err = cl_event_exec(e, (event_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED), CL_FALSE); + err = cl_event_exec(e, ((allow_immediate_submit && event_status == CL_COMPLETE) ? CL_SUBMITTED : CL_QUEUED), CL_FALSE); if (err != CL_SUCCESS) { break; } --- a/src/cl_api_mem.c +++ b/src/cl_api_mem.c @@ -309,7 +309,7 @@ clEnqueueMapBuffer(cl_command_queue comm if (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) data->write_map = 1; - if (e_status == CL_COMPLETE) { + if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { // Sync mode, no need to queue event. err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); if (err != CL_SUCCESS) { @@ -322,6 +322,9 @@ clEnqueueMapBuffer(cl_command_queue comm } cl_command_queue_enqueue_event(command_queue, e); + if (blocking_map) { + cl_event_wait_for_events_list(1, &e); + } } ptr = data->ptr; @@ -469,7 +472,7 @@ clEnqueueUnmapMemObject(cl_command_queue data->mem_obj = memobj; data->ptr = mapped_ptr; - if (e_status == CL_COMPLETE) { // No need to wait + if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { // No need to wait err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); if (err != CL_SUCCESS) { break; @@ -571,7 +574,7 @@ clEnqueueReadBuffer(cl_command_queue com data->offset = offset; data->size = size; - if (e_status == CL_COMPLETE) { + if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { // Sync mode, no need to queue event. err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); if (err != CL_SUCCESS) { @@ -583,6 +586,9 @@ clEnqueueReadBuffer(cl_command_queue com break; } cl_command_queue_enqueue_event(command_queue, e); + if (blocking_read) { + cl_event_wait_for_events_list(1, &e); + } } } while (0); @@ -674,7 +680,7 @@ clEnqueueWriteBuffer(cl_command_queue co data->offset = offset; data->size = size; - if (e_status == CL_COMPLETE) { + if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { // Sync mode, no need to queue event. err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); if (err != CL_SUCCESS) { @@ -686,6 +692,9 @@ clEnqueueWriteBuffer(cl_command_queue co break; } cl_command_queue_enqueue_event(command_queue, e); + if (blocking_write) { + cl_event_wait_for_events_list(1, &e); + } } } while (0); @@ -823,7 +832,7 @@ clEnqueueReadBufferRect(cl_command_queue data->host_row_pitch = host_row_pitch; data->host_slice_pitch = host_slice_pitch; - if (e_status == CL_COMPLETE) { + if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { // Sync mode, no need to queue event. err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); if (err != CL_SUCCESS) { @@ -835,6 +844,9 @@ clEnqueueReadBufferRect(cl_command_queue break; } cl_command_queue_enqueue_event(command_queue, e); + if (blocking_read) { + cl_event_wait_for_events_list(1, &e); + } } } while (0); @@ -974,7 +986,7 @@ clEnqueueWriteBufferRect(cl_command_queu data->host_row_pitch = host_row_pitch; data->host_slice_pitch = host_slice_pitch; - if (e_status == CL_COMPLETE) { + if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { // Sync mode, no need to queue event. err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); if (err != CL_SUCCESS) { @@ -986,6 +998,9 @@ clEnqueueWriteBufferRect(cl_command_queu break; } cl_command_queue_enqueue_event(command_queue, e); + if (blocking_write) { + cl_event_wait_for_events_list(1, &e); + } } } while (0); @@ -1093,7 +1108,7 @@ clEnqueueCopyBuffer(cl_command_queue com break; } - err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); + err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); if (err != CL_SUCCESS) { break; } @@ -1283,7 +1298,7 @@ clEnqueueCopyBufferRect(cl_command_queue if (e_status < CL_COMPLETE) { // Error happend, cancel. err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST; break; - } else if (e_status == CL_COMPLETE) { + } else if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { err = cl_event_exec(e, CL_SUBMITTED, CL_FALSE); if (err != CL_SUCCESS) { break; @@ -1384,7 +1399,7 @@ clEnqueueFillBuffer(cl_command_queue com break; } - err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); + err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); if (err != CL_SUCCESS) { break; } @@ -1471,7 +1486,7 @@ clEnqueueMigrateMemObjects(cl_command_qu break; } - err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); + err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); if (err != CL_SUCCESS) { break; } @@ -1764,7 +1779,7 @@ clEnqueueMapImage(cl_command_queue comma if (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) data->write_map = 1; - if (e_status == CL_COMPLETE) { + if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { // Sync mode, no need to queue event. err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); if (err != CL_SUCCESS) { @@ -1777,6 +1792,9 @@ clEnqueueMapImage(cl_command_queue comma } cl_command_queue_enqueue_event(command_queue, e); + if (blocking_map) { + cl_event_wait_for_events_list(1, &e); + } } ptr = data->ptr; @@ -2014,7 +2032,7 @@ clEnqueueReadImage(cl_command_queue comm data->row_pitch = row_pitch; data->slice_pitch = slice_pitch; - if (e_status == CL_COMPLETE) { + if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { // Sync mode, no need to queue event. err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); if (err != CL_SUCCESS) { @@ -2026,6 +2044,9 @@ clEnqueueReadImage(cl_command_queue comm break; } cl_command_queue_enqueue_event(command_queue, e); + if (blocking_read) { + cl_event_wait_for_events_list(1, &e); + } } } while (0); @@ -2218,7 +2239,7 @@ clEnqueueWriteImage(cl_command_queue com data->row_pitch = row_pitch; data->slice_pitch = slice_pitch; - if (e_status == CL_COMPLETE) { + if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { // Sync mode, no need to queue event. err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); if (err != CL_SUCCESS) { @@ -2230,6 +2251,9 @@ clEnqueueWriteImage(cl_command_queue com break; } cl_command_queue_enqueue_event(command_queue, e); + if (blocking_write) { + cl_event_wait_for_events_list(1, &e); + } } } while (0); @@ -2364,7 +2388,7 @@ clEnqueueCopyImage(cl_command_queue comm break; } - err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); + err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); if (err != CL_SUCCESS) { break; } @@ -2475,7 +2499,7 @@ clEnqueueCopyImageToBuffer(cl_command_qu break; } - err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); + err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); if (err != CL_SUCCESS) { break; } @@ -2587,7 +2611,7 @@ clEnqueueCopyBufferToImage(cl_command_qu break; } - err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); + err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); if (err != CL_SUCCESS) { break; } @@ -2697,7 +2721,7 @@ clEnqueueFillImage(cl_command_queue comm break; } - err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); + err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); if (err != CL_SUCCESS) { break; } --- a/src/cl_command_queue.h +++ b/src/cl_command_queue.h @@ -103,6 +103,11 @@ extern cl_int cl_command_queue_wait_fini extern cl_int cl_command_queue_wait_flush(cl_command_queue queue); /* Note: Must call this function with queue's lock. */ extern cl_event *cl_command_queue_record_in_queue_events(cl_command_queue queue, cl_uint *list_num); +/* Whether it is valid to call cl_event_exec directly, instead of cl_command_queue_enqueue_event */ +static inline cl_bool cl_command_queue_allow_bypass_submit(cl_command_queue queue){ + return (queue->props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)/* if out-of-order, always */ + || list_empty(&queue->worker.enqueued_events);/* if in-order, only if empty */ +} #endif /* __CL_COMMAND_QUEUE_H__ */ --- a/src/cl_command_queue_enqueue.c +++ b/src/cl_command_queue_enqueue.c @@ -65,6 +65,8 @@ worker_thread_function(void *Arg) if (cl_event_is_ready(e) <= CL_COMPLETE) { list_node_del(&e->enqueue_node); list_add_tail(&ready_list, &e->enqueue_node); + } else if(!(queue->props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)){ + break; /* in in-order mode, can't skip over non-ready events */ } } @@ -80,18 +82,20 @@ worker_thread_function(void *Arg) CL_OBJECT_UNLOCK(queue); /* Do the really job without lock.*/ - exec_status = CL_SUBMITTED; - list_for_each_safe(pos, n, &ready_list) - { - e = list_entry(pos, _cl_event, enqueue_node); - cl_event_exec(e, exec_status, CL_FALSE); - } + if (queue->props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) { /* in in-order mode, need to get each all the way to CL_COMPLETE before starting the next one */ + exec_status = CL_SUBMITTED; + list_for_each_safe(pos, n, &ready_list) + { + e = list_entry(pos, _cl_event, enqueue_node); + cl_event_exec(e, exec_status, CL_FALSE); + } - /* Notify all waiting for flush. */ - CL_OBJECT_LOCK(queue); - worker->in_exec_status = CL_SUBMITTED; - CL_OBJECT_NOTIFY_COND(queue); - CL_OBJECT_UNLOCK(queue); + /* Notify all waiting for flush. */ + CL_OBJECT_LOCK(queue); + worker->in_exec_status = CL_SUBMITTED; + CL_OBJECT_NOTIFY_COND(queue); + CL_OBJECT_UNLOCK(queue); + } list_for_each_safe(pos, n, &ready_list) { --- a/src/cl_gl_api.c +++ b/src/cl_gl_api.c @@ -188,7 +188,7 @@ cl_int clEnqueueAcquireGLObjects (cl_com data = &e->exec_data; data->type = EnqueueReturnSuccesss; - if (e_status == CL_COMPLETE) { + if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { // Sync mode, no need to queue event. err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); if (err != CL_SUCCESS) { @@ -274,7 +274,7 @@ cl_int clEnqueueReleaseGLObjects (cl_com data = &e->exec_data; data->type = EnqueueReturnSuccesss; - if (e_status == CL_COMPLETE) { + if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { // Sync mode, no need to queue event. err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); if (err != CL_SUCCESS) { _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet