[Beignet] [PATCH] refine the event implementation and fix some bugs.
From: Luo 1 remove useless data element; 2 fix some logic bugs; 3 add implementation for clEnqueueMarkerWithWaitList. --- src/cl_alloc.c | 1 + src/cl_api.c | 22 +--- src/cl_event.c | 65 -- src/cl_event.h | 4 +++- 4 files changed, 64 insertions(+), 28 deletions(-) diff --git a/src/cl_alloc.c b/src/cl_alloc.c index 20d5578..93d2e6a 100644 --- a/src/cl_alloc.c +++ b/src/cl_alloc.c @@ -71,6 +71,7 @@ cl_free(void *ptr) return; atomic_dec(&cl_alloc_n); free(ptr); + ptr = NULL; } LOCAL size_t diff --git a/src/cl_api.c b/src/cl_api.c index 1543ff4..a8c4fbe 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -2621,10 +2621,26 @@ clEnqueueNativeKernel(cl_command_queue command_queue, error: return err; } +clEnqueueMarker(cl_command_queue command_queue, +cl_event *event) +{ + cl_int err = CL_SUCCESS; + CHECK_QUEUE(command_queue); + if(event == NULL) { +err = CL_INVALID_VALUE; +goto error; + } + + cl_event_marker_with_wait_list(command_queue, 0, NULL, event); +error: + return err; +} cl_int -clEnqueueMarker(cl_command_queue command_queue, -cl_event * event) +clEnqueueMarkerWithWaitList(cl_command_queue command_queue, +cl_uint num_events_in_wait_list, +const cl_event *event_wait_list, +cl_event *event) { cl_int err = CL_SUCCESS; CHECK_QUEUE(command_queue); @@ -2633,7 +2649,7 @@ clEnqueueMarker(cl_command_queue command_queue, goto error; } - cl_event_marker(command_queue, event); + cl_event_marker_with_wait_list(command_queue, num_events_in_wait_list, event_wait_list, event); error: return err; } diff --git a/src/cl_event.c b/src/cl_event.c index 727ee1f..203bfc2 100644 --- a/src/cl_event.c +++ b/src/cl_event.c @@ -224,7 +224,7 @@ cl_int cl_event_wait_events(cl_uint num_events_in_wait_list, const cl_event *eve if((event_wait_list[i]->type == CL_COMMAND_USER) || (event_wait_list[i]->enqueue_cb && (event_wait_list[i]->enqueue_cb->wait_user_events != NULL))){ - for(j=0; jgpgpu_event) cl_gpgpu_event_update_status(event_wait_list[i]->gpgpu_event, 1); -cl_event_set_status(event_wait_list[i], CL_COMPLETE); //Execute user's callback + cl_event_set_status(event_wait_list[i], CL_COMPLETE); //Execute user's callback } return CL_ENQUEUE_EXECUTE_IMM; } @@ -260,12 +260,14 @@ void cl_event_new_enqueue_callback(cl_event event, cl_int i; GET_QUEUE_THREAD_GPGPU(data->queue); - /* Allocate and inialize the structure itself */ + /* Allocate and initialize the structure itself */ TRY_ALLOC_NO_ERR (cb, CALLOC(enqueue_callback)); +#if 0 cb->num_events = num_events_in_wait_list; TRY_ALLOC_NO_ERR (cb->wait_list, CALLOC_ARRAY(cl_event, num_events_in_wait_list)); for(i=0; iwait_list[i] = event_wait_list[i]; +#endif cb->event = event; cb->next = NULL; cb->wait_user_events = NULL; @@ -276,12 +278,13 @@ void cl_event_new_enqueue_callback(cl_event event, node = queue->wait_events[i]->waits_head; if(node == NULL) queue->wait_events[i]->waits_head = cb; - else -while((node != cb) && node->next) - node = node->next; -if(node == cb) //wait on dup user event - continue; -node->next = cb; + else{ + while((node != cb) && node->next) + node = node->next; + if(node == cb) //wait on dup user event + continue; + node->next = cb; + } /* Insert the user event to enqueue_callback's wait_user_events */ TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event)); @@ -291,7 +294,7 @@ void cl_event_new_enqueue_callback(cl_event event, } } - /* Find out all user events that events in event_wait_list wait */ + /* Find out all user events that in event_wait_list wait */ for(i=0; istatus <= CL_COMPLETE) continue; @@ -319,21 +322,25 @@ void cl_event_new_enqueue_callback(cl_event event, while(user_events != NULL) { /* Insert the enqueue_callback to user event's waits_tail */ node = user_events->event->waits_head; -while((node != cb) && node->next) - node = node->next; -if(node == cb) { //wait on dup user event - user_events = user_events->next; - continue; +if(node == NULL) +event_wait_list[i]->waits_head = cb; +else{ +while((node != cb) && node->next) +node = node->next; +if(node == cb) { //wait on dup user event +user_events = user_events->next; +continue; +} +node->next = cb; } -node->next = cb; /* Insert the user event to enqueue_callback's wait_user_events */ TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event)); u_ev->event = user_events->event; u_ev->next = cb->wait_user_events;
[Beignet] [PATCH] add OpenCL 1.2 API clEnqueueMarkerWithWaitList.
From: Luo --- src/cl_api.c | 23 --- src/cl_event.c | 10 +- src/cl_event.h | 2 +- 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/src/cl_api.c b/src/cl_api.c index 1543ff4..0f62cb5 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -2621,10 +2621,26 @@ clEnqueueNativeKernel(cl_command_queue command_queue, error: return err; } +clEnqueueMarker(cl_command_queue command_queue, +cl_event *event) +{ + cl_int err = CL_SUCCESS; + CHECK_QUEUE(command_queue); + if(event == NULL) { +err = CL_INVALID_VALUE; +goto error; + } + + cl_event_marker_with_wait_list(command_queue, 0, NULL, event); +error: + return err; +} cl_int -clEnqueueMarker(cl_command_queue command_queue, -cl_event * event) +clEnqueueMarkerWithWaitList(cl_command_queue command_queue, +cl_uint num_events_in_wait_list, +const cl_event *event_wait_list, +cl_event *event) { cl_int err = CL_SUCCESS; CHECK_QUEUE(command_queue); @@ -2632,8 +2648,9 @@ clEnqueueMarker(cl_command_queue command_queue, err = CL_INVALID_VALUE; goto error; } + TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, command_queue->ctx); - cl_event_marker(command_queue, event); + cl_event_marker_with_wait_list(command_queue, num_events_in_wait_list, event_wait_list, event); error: return err; } diff --git a/src/cl_event.c b/src/cl_event.c index 727ee1f..9d4e491 100644 --- a/src/cl_event.c +++ b/src/cl_event.c @@ -474,7 +474,10 @@ void cl_event_update_status(cl_event event) cl_event_set_status(event, CL_COMPLETE); } -cl_int cl_event_marker(cl_command_queue queue, cl_event* event) +cl_int cl_event_marker_with_wait_list(cl_command_queue queue, +cl_uint num_events_in_wait_list, +const cl_event *event_wait_list, +cl_event* event) { enqueue_data data; @@ -482,6 +485,11 @@ cl_int cl_event_marker(cl_command_queue queue, cl_event* event) if(event == NULL) return CL_OUT_OF_HOST_MEMORY; + //insert the input events to queue + for(i=0; i0, the marker event need wait queue->wait_events if(queue->wait_events_num > 0) { data.type = EnqueueMarker; diff --git a/src/cl_event.h b/src/cl_event.h index 3c61110..5a78a8d 100644 --- a/src/cl_event.h +++ b/src/cl_event.h @@ -90,7 +90,7 @@ void cl_event_set_status(cl_event, cl_int); /* Check and update event status */ void cl_event_update_status(cl_event); /* Create the marker event */ -cl_int cl_event_marker(cl_command_queue, cl_event*); +cl_int cl_event_marker_with_wait_list(cl_command_queue, cl_uint, const cl_event *, cl_event*); /* Do the event profiling */ cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name); #endif /* __CL_EVENT_H__ */ -- 1.8.1.2 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH 2/2] add [opencl 1.2] API clEnqueueMarkerWithWaitList.
From: Luo --- src/cl_api.c | 23 --- src/cl_event.c | 11 ++- src/cl_event.h | 2 +- 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/src/cl_api.c b/src/cl_api.c index 1543ff4..0f62cb5 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -2621,10 +2621,26 @@ clEnqueueNativeKernel(cl_command_queue command_queue, error: return err; } +clEnqueueMarker(cl_command_queue command_queue, +cl_event *event) +{ + cl_int err = CL_SUCCESS; + CHECK_QUEUE(command_queue); + if(event == NULL) { +err = CL_INVALID_VALUE; +goto error; + } + + cl_event_marker_with_wait_list(command_queue, 0, NULL, event); +error: + return err; +} cl_int -clEnqueueMarker(cl_command_queue command_queue, -cl_event * event) +clEnqueueMarkerWithWaitList(cl_command_queue command_queue, +cl_uint num_events_in_wait_list, +const cl_event *event_wait_list, +cl_event *event) { cl_int err = CL_SUCCESS; CHECK_QUEUE(command_queue); @@ -2632,8 +2648,9 @@ clEnqueueMarker(cl_command_queue command_queue, err = CL_INVALID_VALUE; goto error; } + TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, command_queue->ctx); - cl_event_marker(command_queue, event); + cl_event_marker_with_wait_list(command_queue, num_events_in_wait_list, event_wait_list, event); error: return err; } diff --git a/src/cl_event.c b/src/cl_event.c index 50ca134..1ad12a7 100644 --- a/src/cl_event.c +++ b/src/cl_event.c @@ -480,14 +480,23 @@ void cl_event_update_status(cl_event event) cl_event_set_status(event, CL_COMPLETE); } -cl_int cl_event_marker(cl_command_queue queue, cl_event* event) +cl_int cl_event_marker_with_wait_list(cl_command_queue queue, +cl_uint num_events_in_wait_list, +const cl_event *event_wait_list, +cl_event* event) { enqueue_data data; + cl_uint i = 0; *event = cl_event_new(queue->ctx, queue, CL_COMMAND_MARKER, CL_TRUE); if(event == NULL) return CL_OUT_OF_HOST_MEMORY; + //insert the input events to queue + for(i=0; i0, the marker event need wait queue->wait_events if(queue->wait_events_num > 0) { data.type = EnqueueMarker; diff --git a/src/cl_event.h b/src/cl_event.h index 3c61110..5a78a8d 100644 --- a/src/cl_event.h +++ b/src/cl_event.h @@ -90,7 +90,7 @@ void cl_event_set_status(cl_event, cl_int); /* Check and update event status */ void cl_event_update_status(cl_event); /* Create the marker event */ -cl_int cl_event_marker(cl_command_queue, cl_event*); +cl_int cl_event_marker_with_wait_list(cl_command_queue, cl_uint, const cl_event *, cl_event*); /* Do the event profiling */ cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name); #endif /* __CL_EVENT_H__ */ -- 1.8.1.2 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH 1/2] fix event related bugs.
From: Luo --- src/cl_alloc.c | 1 + src/cl_event.c | 76 +++--- 2 files changed, 42 insertions(+), 35 deletions(-) diff --git a/src/cl_alloc.c b/src/cl_alloc.c index 20d5578..93d2e6a 100644 --- a/src/cl_alloc.c +++ b/src/cl_alloc.c @@ -71,6 +71,7 @@ cl_free(void *ptr) return; atomic_dec(&cl_alloc_n); free(ptr); + ptr = NULL; } LOCAL size_t diff --git a/src/cl_event.c b/src/cl_event.c index 727ee1f..50ca134 100644 --- a/src/cl_event.c +++ b/src/cl_event.c @@ -260,12 +260,12 @@ void cl_event_new_enqueue_callback(cl_event event, cl_int i; GET_QUEUE_THREAD_GPGPU(data->queue); - /* Allocate and inialize the structure itself */ + /* Allocate and initialize the structure itself */ TRY_ALLOC_NO_ERR (cb, CALLOC(enqueue_callback)); cb->num_events = num_events_in_wait_list; TRY_ALLOC_NO_ERR (cb->wait_list, CALLOC_ARRAY(cl_event, num_events_in_wait_list)); for(i=0; iwait_list[i] = event_wait_list[i]; + cb->wait_list[i] = event_wait_list[i]; cb->event = event; cb->next = NULL; cb->wait_user_events = NULL; @@ -276,12 +276,13 @@ void cl_event_new_enqueue_callback(cl_event event, node = queue->wait_events[i]->waits_head; if(node == NULL) queue->wait_events[i]->waits_head = cb; - else -while((node != cb) && node->next) - node = node->next; -if(node == cb) //wait on dup user event - continue; -node->next = cb; + else{ + while((node != cb) && node->next) + node = node->next; + if(node == cb) //wait on dup user event + continue; + node->next = cb; + } /* Insert the user event to enqueue_callback's wait_user_events */ TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event)); @@ -291,7 +292,7 @@ void cl_event_new_enqueue_callback(cl_event event, } } - /* Find out all user events that events in event_wait_list wait */ + /* Find out all user events that in event_wait_list wait */ for(i=0; istatus <= CL_COMPLETE) continue; @@ -319,21 +320,25 @@ void cl_event_new_enqueue_callback(cl_event event, while(user_events != NULL) { /* Insert the enqueue_callback to user event's waits_tail */ node = user_events->event->waits_head; -while((node != cb) && node->next) - node = node->next; -if(node == cb) { //wait on dup user event - user_events = user_events->next; - continue; +if(node == NULL) +event_wait_list[i]->waits_head = cb; +else{ +while((node != cb) && node->next) +node = node->next; +if(node == cb) { //wait on dup user event +user_events = user_events->next; +continue; +} +node->next = cb; } -node->next = cb; /* Insert the user event to enqueue_callback's wait_user_events */ TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event)); u_ev->event = user_events->event; u_ev->next = cb->wait_user_events; cb->wait_user_events = u_ev; +cl_command_queue_insert_event(event->queue, user_events->event); user_events = user_events->next; -cl_command_queue_insert_event(event->queue, event_wait_list[i]); } } } @@ -353,8 +358,6 @@ error: cb->wait_user_events = cb->wait_user_events->next; cl_free(u_ev); } -if(cb->wait_list) - cl_free(cb->wait_list); cl_free(cb); } goto exit; @@ -363,7 +366,7 @@ error: void cl_event_set_status(cl_event event, cl_int status) { user_callback *user_cb; - user_event*u_ev, *u_ev_next; + user_event*u_ev; cl_int ret, i; cl_event evt; @@ -387,11 +390,11 @@ void cl_event_set_status(cl_event event, cl_int status) pthread_mutex_unlock(&event->ctx->event_lock); for(i=0; ienqueue_cb->num_events; i++) -cl_event_delete(event->enqueue_cb->wait_list[i]); + cl_event_delete(event->enqueue_cb->wait_list[i]); pthread_mutex_lock(&event->ctx->event_lock); if(event->enqueue_cb->wait_list) -cl_free(event->enqueue_cb->wait_list); + cl_free(event->enqueue_cb->wait_list); cl_free(event->enqueue_cb); event->enqueue_cb = NULL; } @@ -419,22 +422,25 @@ void cl_event_set_status(cl_event event, cl_int status) /* Check all defer enqueue */ enqueue_callback *cb, *enqueue_cb = event->waits_head; while(enqueue_cb) { -/* Remove this user event in enqueue_cb */ -while(enqueue_cb->wait_user_events && - enqueue_cb->wait_user_events->event == event) { - u_ev = enqueue_cb->wait_user_events; - enqueue_cb->wait_user_events = enqueue_cb->wait_user_events->next; - cl_free(u_ev); -} - +/* Remove this user event in enqueue_cb, update the header if needed. */ u_ev = enqueue_cb->wait_user_events; +user_event * u_prev = N
[Beignet] [PATCH 1/2] [opencl 1.2]fix event related bugs.
From: Luo --- src/cl_alloc.c | 1 + src/cl_event.c | 76 -- 2 files changed, 43 insertions(+), 34 deletions(-) diff --git a/src/cl_alloc.c b/src/cl_alloc.c index 20d5578..93d2e6a 100644 --- a/src/cl_alloc.c +++ b/src/cl_alloc.c @@ -71,6 +71,7 @@ cl_free(void *ptr) return; atomic_dec(&cl_alloc_n); free(ptr); + ptr = NULL; } LOCAL size_t diff --git a/src/cl_event.c b/src/cl_event.c index 727ee1f..620c116 100644 --- a/src/cl_event.c +++ b/src/cl_event.c @@ -260,12 +260,12 @@ void cl_event_new_enqueue_callback(cl_event event, cl_int i; GET_QUEUE_THREAD_GPGPU(data->queue); - /* Allocate and inialize the structure itself */ + /* Allocate and initialize the structure itself */ TRY_ALLOC_NO_ERR (cb, CALLOC(enqueue_callback)); cb->num_events = num_events_in_wait_list; TRY_ALLOC_NO_ERR (cb->wait_list, CALLOC_ARRAY(cl_event, num_events_in_wait_list)); for(i=0; iwait_list[i] = event_wait_list[i]; + cb->wait_list[i] = event_wait_list[i]; cb->event = event; cb->next = NULL; cb->wait_user_events = NULL; @@ -276,12 +276,13 @@ void cl_event_new_enqueue_callback(cl_event event, node = queue->wait_events[i]->waits_head; if(node == NULL) queue->wait_events[i]->waits_head = cb; - else -while((node != cb) && node->next) - node = node->next; -if(node == cb) //wait on dup user event - continue; -node->next = cb; + else{ + while((node != cb) && node->next) + node = node->next; + if(node == cb) //wait on dup user event + continue; + node->next = cb; + } /* Insert the user event to enqueue_callback's wait_user_events */ TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event)); @@ -291,7 +292,7 @@ void cl_event_new_enqueue_callback(cl_event event, } } - /* Find out all user events that events in event_wait_list wait */ + /* Find out all user events that in event_wait_list wait */ for(i=0; istatus <= CL_COMPLETE) continue; @@ -319,21 +320,25 @@ void cl_event_new_enqueue_callback(cl_event event, while(user_events != NULL) { /* Insert the enqueue_callback to user event's waits_tail */ node = user_events->event->waits_head; -while((node != cb) && node->next) - node = node->next; -if(node == cb) { //wait on dup user event - user_events = user_events->next; - continue; +if(node == NULL) +event_wait_list[i]->waits_head = cb; +else{ +while((node != cb) && node->next) +node = node->next; +if(node == cb) { //wait on dup user event +user_events = user_events->next; +continue; +} +node->next = cb; } -node->next = cb; /* Insert the user event to enqueue_callback's wait_user_events */ TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event)); u_ev->event = user_events->event; u_ev->next = cb->wait_user_events; cb->wait_user_events = u_ev; +cl_command_queue_insert_event(event->queue, user_events->event); user_events = user_events->next; -cl_command_queue_insert_event(event->queue, event_wait_list[i]); } } } @@ -354,7 +359,7 @@ error: cl_free(u_ev); } if(cb->wait_list) - cl_free(cb->wait_list); +cl_free(cb->wait_list); cl_free(cb); } goto exit; @@ -363,7 +368,7 @@ error: void cl_event_set_status(cl_event event, cl_int status) { user_callback *user_cb; - user_event*u_ev, *u_ev_next; + user_event*u_ev; cl_int ret, i; cl_event evt; @@ -387,11 +392,11 @@ void cl_event_set_status(cl_event event, cl_int status) pthread_mutex_unlock(&event->ctx->event_lock); for(i=0; ienqueue_cb->num_events; i++) -cl_event_delete(event->enqueue_cb->wait_list[i]); + cl_event_delete(event->enqueue_cb->wait_list[i]); pthread_mutex_lock(&event->ctx->event_lock); if(event->enqueue_cb->wait_list) -cl_free(event->enqueue_cb->wait_list); + cl_free(event->enqueue_cb->wait_list); cl_free(event->enqueue_cb); event->enqueue_cb = NULL; } @@ -419,22 +424,25 @@ void cl_event_set_status(cl_event event, cl_int status) /* Check all defer enqueue */ enqueue_callback *cb, *enqueue_cb = event->waits_head; while(enqueue_cb) { -/* Remove this user event in enqueue_cb */ -while(enqueue_cb->wait_user_events && - enqueue_cb->wait_user_events->event == event) { - u_ev = enqueue_cb->wait_user_events; - enqueue_cb->wait_user_events = enqueue_cb->wait_user_events->next; - cl_free(u_ev); -} - +/* Remove this user event in enqueue_cb, update the header if needed. */ u_ev = enqueue_cb->wait_user_events; +user_event * u_prev = NULL; +user_event *tmp
[Beignet] [PATCH 2/2] add [opencl 1.2] API clEnqueueMarkerWithWaitList.
From: Luo --- src/cl_api.c | 25 ++--- src/cl_event.c | 11 ++- src/cl_event.h | 2 +- 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/src/cl_api.c b/src/cl_api.c index 1543ff4..b5c42e7 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -2623,8 +2623,8 @@ error: } cl_int -clEnqueueMarker(cl_command_queue command_queue, -cl_event * event) +clEnqueueMarker(cl_command_queue command_queue, +cl_event *event) { cl_int err = CL_SUCCESS; CHECK_QUEUE(command_queue); @@ -2633,7 +2633,26 @@ clEnqueueMarker(cl_command_queue command_queue, goto error; } - cl_event_marker(command_queue, event); + cl_event_marker_with_wait_list(command_queue, 0, NULL, event); +error: + return err; +} + +cl_int +clEnqueueMarkerWithWaitList(cl_command_queue command_queue, +cl_uint num_events_in_wait_list, +const cl_event *event_wait_list, +cl_event *event) +{ + cl_int err = CL_SUCCESS; + CHECK_QUEUE(command_queue); + if(event == NULL) { +err = CL_INVALID_VALUE; +goto error; + } + TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, command_queue->ctx); + + cl_event_marker_with_wait_list(command_queue, num_events_in_wait_list, event_wait_list, event); error: return err; } diff --git a/src/cl_event.c b/src/cl_event.c index 620c116..07260ac 100644 --- a/src/cl_event.c +++ b/src/cl_event.c @@ -482,14 +482,23 @@ void cl_event_update_status(cl_event event) cl_event_set_status(event, CL_COMPLETE); } -cl_int cl_event_marker(cl_command_queue queue, cl_event* event) +cl_int cl_event_marker_with_wait_list(cl_command_queue queue, +cl_uint num_events_in_wait_list, +const cl_event *event_wait_list, +cl_event* event) { enqueue_data data; + cl_uint i = 0; *event = cl_event_new(queue->ctx, queue, CL_COMMAND_MARKER, CL_TRUE); if(event == NULL) return CL_OUT_OF_HOST_MEMORY; + //insert the input events to queue + for(i=0; i0, the marker event need wait queue->wait_events if(queue->wait_events_num > 0) { data.type = EnqueueMarker; diff --git a/src/cl_event.h b/src/cl_event.h index 3c61110..5a78a8d 100644 --- a/src/cl_event.h +++ b/src/cl_event.h @@ -90,7 +90,7 @@ void cl_event_set_status(cl_event, cl_int); /* Check and update event status */ void cl_event_update_status(cl_event); /* Create the marker event */ -cl_int cl_event_marker(cl_command_queue, cl_event*); +cl_int cl_event_marker_with_wait_list(cl_command_queue, cl_uint, const cl_event *, cl_event*); /* Do the event profiling */ cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name); #endif /* __CL_EVENT_H__ */ -- 1.8.1.2 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH 2/2] add [opencl 1.2] API clEnqueueMarkerWithWaitList.
From: Luo --- src/cl_api.c | 25 ++--- src/cl_event.c | 11 ++- src/cl_event.h | 2 +- 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/src/cl_api.c b/src/cl_api.c index 1543ff4..b5c42e7 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -2623,8 +2623,8 @@ error: } cl_int -clEnqueueMarker(cl_command_queue command_queue, -cl_event * event) +clEnqueueMarker(cl_command_queue command_queue, +cl_event *event) { cl_int err = CL_SUCCESS; CHECK_QUEUE(command_queue); @@ -2633,7 +2633,26 @@ clEnqueueMarker(cl_command_queue command_queue, goto error; } - cl_event_marker(command_queue, event); + cl_event_marker_with_wait_list(command_queue, 0, NULL, event); +error: + return err; +} + +cl_int +clEnqueueMarkerWithWaitList(cl_command_queue command_queue, +cl_uint num_events_in_wait_list, +const cl_event *event_wait_list, +cl_event *event) +{ + cl_int err = CL_SUCCESS; + CHECK_QUEUE(command_queue); + if(event == NULL) { +err = CL_INVALID_VALUE; +goto error; + } + TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, command_queue->ctx); + + cl_event_marker_with_wait_list(command_queue, num_events_in_wait_list, event_wait_list, event); error: return err; } diff --git a/src/cl_event.c b/src/cl_event.c index 85e4041..46006ce 100644 --- a/src/cl_event.c +++ b/src/cl_event.c @@ -486,14 +486,23 @@ void cl_event_update_status(cl_event event) cl_event_set_status(event, CL_COMPLETE); } -cl_int cl_event_marker(cl_command_queue queue, cl_event* event) +cl_int cl_event_marker_with_wait_list(cl_command_queue queue, +cl_uint num_events_in_wait_list, +const cl_event *event_wait_list, +cl_event* event) { enqueue_data data; + cl_uint i = 0; *event = cl_event_new(queue->ctx, queue, CL_COMMAND_MARKER, CL_TRUE); if(event == NULL) return CL_OUT_OF_HOST_MEMORY; + //insert the input events to queue + for(i=0; i0, the marker event need wait queue->wait_events if(queue->wait_events_num > 0) { data.type = EnqueueMarker; diff --git a/src/cl_event.h b/src/cl_event.h index 3c61110..5a78a8d 100644 --- a/src/cl_event.h +++ b/src/cl_event.h @@ -90,7 +90,7 @@ void cl_event_set_status(cl_event, cl_int); /* Check and update event status */ void cl_event_update_status(cl_event); /* Create the marker event */ -cl_int cl_event_marker(cl_command_queue, cl_event*); +cl_int cl_event_marker_with_wait_list(cl_command_queue, cl_uint, const cl_event *, cl_event*); /* Do the event profiling */ cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name); #endif /* __CL_EVENT_H__ */ -- 1.8.1.2 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH 1/2] [opencl 1.2]fix event related bugs.
From: Luo --- src/cl_alloc.c | 1 + src/cl_event.c | 80 +- 2 files changed, 47 insertions(+), 34 deletions(-) diff --git a/src/cl_alloc.c b/src/cl_alloc.c index 20d5578..93d2e6a 100644 --- a/src/cl_alloc.c +++ b/src/cl_alloc.c @@ -71,6 +71,7 @@ cl_free(void *ptr) return; atomic_dec(&cl_alloc_n); free(ptr); + ptr = NULL; } LOCAL size_t diff --git a/src/cl_event.c b/src/cl_event.c index 727ee1f..85e4041 100644 --- a/src/cl_event.c +++ b/src/cl_event.c @@ -260,12 +260,12 @@ void cl_event_new_enqueue_callback(cl_event event, cl_int i; GET_QUEUE_THREAD_GPGPU(data->queue); - /* Allocate and inialize the structure itself */ + /* Allocate and initialize the structure itself */ TRY_ALLOC_NO_ERR (cb, CALLOC(enqueue_callback)); cb->num_events = num_events_in_wait_list; TRY_ALLOC_NO_ERR (cb->wait_list, CALLOC_ARRAY(cl_event, num_events_in_wait_list)); for(i=0; iwait_list[i] = event_wait_list[i]; + cb->wait_list[i] = event_wait_list[i]; cb->event = event; cb->next = NULL; cb->wait_user_events = NULL; @@ -276,12 +276,13 @@ void cl_event_new_enqueue_callback(cl_event event, node = queue->wait_events[i]->waits_head; if(node == NULL) queue->wait_events[i]->waits_head = cb; - else -while((node != cb) && node->next) - node = node->next; -if(node == cb) //wait on dup user event - continue; -node->next = cb; + else{ + while((node != cb) && node->next) + node = node->next; + if(node == cb) //wait on dup user event + continue; + node->next = cb; + } /* Insert the user event to enqueue_callback's wait_user_events */ TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event)); @@ -291,7 +292,7 @@ void cl_event_new_enqueue_callback(cl_event event, } } - /* Find out all user events that events in event_wait_list wait */ + /* Find out all user events that in event_wait_list wait */ for(i=0; istatus <= CL_COMPLETE) continue; @@ -319,21 +320,25 @@ void cl_event_new_enqueue_callback(cl_event event, while(user_events != NULL) { /* Insert the enqueue_callback to user event's waits_tail */ node = user_events->event->waits_head; -while((node != cb) && node->next) - node = node->next; -if(node == cb) { //wait on dup user event - user_events = user_events->next; - continue; +if(node == NULL) +event_wait_list[i]->waits_head = cb; +else{ +while((node != cb) && node->next) +node = node->next; +if(node == cb) { //wait on dup user event +user_events = user_events->next; +continue; +} +node->next = cb; } -node->next = cb; /* Insert the user event to enqueue_callback's wait_user_events */ TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event)); u_ev->event = user_events->event; u_ev->next = cb->wait_user_events; cb->wait_user_events = u_ev; +cl_command_queue_insert_event(event->queue, user_events->event); user_events = user_events->next; -cl_command_queue_insert_event(event->queue, event_wait_list[i]); } } } @@ -354,7 +359,7 @@ error: cl_free(u_ev); } if(cb->wait_list) - cl_free(cb->wait_list); +cl_free(cb->wait_list); cl_free(cb); } goto exit; @@ -363,7 +368,7 @@ error: void cl_event_set_status(cl_event event, cl_int status) { user_callback *user_cb; - user_event*u_ev, *u_ev_next; + user_event*u_ev; cl_int ret, i; cl_event evt; @@ -387,11 +392,11 @@ void cl_event_set_status(cl_event event, cl_int status) pthread_mutex_unlock(&event->ctx->event_lock); for(i=0; ienqueue_cb->num_events; i++) -cl_event_delete(event->enqueue_cb->wait_list[i]); + cl_event_delete(event->enqueue_cb->wait_list[i]); pthread_mutex_lock(&event->ctx->event_lock); if(event->enqueue_cb->wait_list) -cl_free(event->enqueue_cb->wait_list); + cl_free(event->enqueue_cb->wait_list); cl_free(event->enqueue_cb); event->enqueue_cb = NULL; } @@ -419,22 +424,29 @@ void cl_event_set_status(cl_event event, cl_int status) /* Check all defer enqueue */ enqueue_callback *cb, *enqueue_cb = event->waits_head; while(enqueue_cb) { -/* Remove this user event in enqueue_cb */ -while(enqueue_cb->wait_user_events && - enqueue_cb->wait_user_events->event == event) { - u_ev = enqueue_cb->wait_user_events; - enqueue_cb->wait_user_events = enqueue_cb->wait_user_events->next; - cl_free(u_ev); -} - +/* Remove this user event in enqueue_cb, update the header if needed. */ u_ev = enqueue_cb->wait_user_events; +user_event * u_prev = NULL; +user_event *tmp
[Beignet] [Patch V2 1/2] [opencl 1.2]fix event related bugs.
From: Luo --- src/cl_alloc.c | 1 + src/cl_event.c | 64 ++ 2 files changed, 39 insertions(+), 26 deletions(-) diff --git a/src/cl_alloc.c b/src/cl_alloc.c index 20d5578..93d2e6a 100644 --- a/src/cl_alloc.c +++ b/src/cl_alloc.c @@ -71,6 +71,7 @@ cl_free(void *ptr) return; atomic_dec(&cl_alloc_n); free(ptr); + ptr = NULL; } LOCAL size_t diff --git a/src/cl_event.c b/src/cl_event.c index 727ee1f..9d21984 100644 --- a/src/cl_event.c +++ b/src/cl_event.c @@ -260,7 +260,7 @@ void cl_event_new_enqueue_callback(cl_event event, cl_int i; GET_QUEUE_THREAD_GPGPU(data->queue); - /* Allocate and inialize the structure itself */ + /* Allocate and initialize the structure itself */ TRY_ALLOC_NO_ERR (cb, CALLOC(enqueue_callback)); cb->num_events = num_events_in_wait_list; TRY_ALLOC_NO_ERR (cb->wait_list, CALLOC_ARRAY(cl_event, num_events_in_wait_list)); @@ -276,12 +276,13 @@ void cl_event_new_enqueue_callback(cl_event event, node = queue->wait_events[i]->waits_head; if(node == NULL) queue->wait_events[i]->waits_head = cb; - else + else{ while((node != cb) && node->next) node = node->next; if(node == cb) //wait on dup user event continue; node->next = cb; + } /* Insert the user event to enqueue_callback's wait_user_events */ TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event)); @@ -291,7 +292,7 @@ void cl_event_new_enqueue_callback(cl_event event, } } - /* Find out all user events that events in event_wait_list wait */ + /* Find out all user events that in event_wait_list wait */ for(i=0; istatus <= CL_COMPLETE) continue; @@ -319,21 +320,25 @@ void cl_event_new_enqueue_callback(cl_event event, while(user_events != NULL) { /* Insert the enqueue_callback to user event's waits_tail */ node = user_events->event->waits_head; -while((node != cb) && node->next) - node = node->next; -if(node == cb) { //wait on dup user event - user_events = user_events->next; - continue; +if(node == NULL) + event_wait_list[i]->waits_head = cb; +else{ + while((node != cb) && node->next) +node = node->next; + if(node == cb) { //wait on dup user event +user_events = user_events->next; +continue; + } + node->next = cb; } -node->next = cb; /* Insert the user event to enqueue_callback's wait_user_events */ TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event)); u_ev->event = user_events->event; u_ev->next = cb->wait_user_events; cb->wait_user_events = u_ev; +cl_command_queue_insert_event(event->queue, user_events->event); user_events = user_events->next; -cl_command_queue_insert_event(event->queue, event_wait_list[i]); } } } @@ -363,7 +368,7 @@ error: void cl_event_set_status(cl_event event, cl_int status) { user_callback *user_cb; - user_event*u_ev, *u_ev_next; + user_event*u_ev; cl_int ret, i; cl_event evt; @@ -419,22 +424,29 @@ void cl_event_set_status(cl_event event, cl_int status) /* Check all defer enqueue */ enqueue_callback *cb, *enqueue_cb = event->waits_head; while(enqueue_cb) { -/* Remove this user event in enqueue_cb */ -while(enqueue_cb->wait_user_events && - enqueue_cb->wait_user_events->event == event) { - u_ev = enqueue_cb->wait_user_events; - enqueue_cb->wait_user_events = enqueue_cb->wait_user_events->next; - cl_free(u_ev); -} - +/* Remove this user event in enqueue_cb, update the header if needed. */ u_ev = enqueue_cb->wait_user_events; +user_event * u_prev = NULL; +user_event *tmp =NULL; while(u_ev) { - u_ev_next = u_ev->next; - if(u_ev_next && u_ev_next->event == event) { -u_ev->next = u_ev_next->next; -cl_free(u_ev_next); - } else -u_ev->next = u_ev_next; + if(u_ev && u_ev->event == event) { +if(u_prev){ + u_prev->next = u_ev->next; +} +tmp = u_ev; +u_ev = u_ev->next; +cl_free(tmp); + }else{ +if(!u_prev){ + enqueue_cb->wait_user_events = u_ev; +} +u_prev=u_ev; +u_ev = u_ev->next; + } +} + +if(!u_prev){ + enqueue_cb->wait_user_events = NULL; } /* Still wait on other user events */ @@ -448,7 +460,7 @@ void cl_event_set_status(cl_event event, cl_int status) /* All user events complete, now wait enqueue events */ ret = cl_event_wait_events(enqueue_cb->num_events, enqueue_cb->wait_list, - enqueue_cb->event->queue); +enqueue_cb->event->queue); ret = ret; assert(ret != CL_ENQUEUE_EXECUTE_DEFER); -- 1.8.1.2 ___
[Beignet] [Patch V2 2/2] add [opencl 1.2] API clEnqueueMarkerWithWaitList.
From: Luo --- src/cl_api.c | 25 ++--- src/cl_event.c | 20 +++- src/cl_event.h | 2 +- 3 files changed, 42 insertions(+), 5 deletions(-) diff --git a/src/cl_api.c b/src/cl_api.c index 1543ff4..de67f01 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -2623,8 +2623,8 @@ error: } cl_int -clEnqueueMarker(cl_command_queue command_queue, -cl_event * event) +clEnqueueMarker(cl_command_queue command_queue, +cl_event *event) { cl_int err = CL_SUCCESS; CHECK_QUEUE(command_queue); @@ -2633,7 +2633,26 @@ clEnqueueMarker(cl_command_queue command_queue, goto error; } - cl_event_marker(command_queue, event); + cl_event_marker_with_wait_list(command_queue, 0, NULL, event); +error: + return err; +} + +cl_int +clEnqueueMarkerWithWaitList(cl_command_queue command_queue, +cl_uint num_events_in_wait_list, +const cl_event *event_wait_list, +cl_event *event) +{ + cl_int err = CL_SUCCESS; + CHECK_QUEUE(command_queue); + if(event == NULL) { +err = CL_INVALID_VALUE; +goto error; + } + TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, command_queue->ctx); + + cl_event_marker_with_wait_list(command_queue, num_events_in_wait_list, event_wait_list, event); error: return err; } diff --git a/src/cl_event.c b/src/cl_event.c index 9d21984..54520d2 100644 --- a/src/cl_event.c +++ b/src/cl_event.c @@ -486,14 +486,32 @@ void cl_event_update_status(cl_event event) cl_event_set_status(event, CL_COMPLETE); } -cl_int cl_event_marker(cl_command_queue queue, cl_event* event) +cl_int cl_event_marker_with_wait_list(cl_command_queue queue, +cl_uint num_events_in_wait_list, +const cl_event *event_wait_list, +cl_event* event) { enqueue_data data; + cl_uint i = 0; *event = cl_event_new(queue->ctx, queue, CL_COMMAND_MARKER, CL_TRUE); if(event == NULL) return CL_OUT_OF_HOST_MEMORY; + //insert the input events to queue + for(i=0; itype==CL_COMMAND_USER) { + cl_command_queue_insert_event(queue, event_wait_list[i]); +}else if(event_wait_list[i]->enqueue_cb != NULL) { + user_event* user_events = event_wait_list[i]->enqueue_cb->wait_user_events; + + while(user_events != NULL) { +cl_command_queue_insert_event(queue, user_events->event); +user_events = user_events->next; + } +} + } + //if wait_events_num>0, the marker event need wait queue->wait_events if(queue->wait_events_num > 0) { data.type = EnqueueMarker; diff --git a/src/cl_event.h b/src/cl_event.h index 3c61110..5a78a8d 100644 --- a/src/cl_event.h +++ b/src/cl_event.h @@ -90,7 +90,7 @@ void cl_event_set_status(cl_event, cl_int); /* Check and update event status */ void cl_event_update_status(cl_event); /* Create the marker event */ -cl_int cl_event_marker(cl_command_queue, cl_event*); +cl_int cl_event_marker_with_wait_list(cl_command_queue, cl_uint, const cl_event *, cl_event*); /* Do the event profiling */ cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name); #endif /* __CL_EVENT_H__ */ -- 1.8.1.2 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH V3 2/2] add [opencl 1.2] API clEnqueueMarkerWithWaitList.
From: Luo --- src/cl_api.c | 25 ++--- src/cl_event.c | 15 --- src/cl_event.h | 2 +- 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/src/cl_api.c b/src/cl_api.c index 1543ff4..de67f01 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -2623,8 +2623,8 @@ error: } cl_int -clEnqueueMarker(cl_command_queue command_queue, -cl_event * event) +clEnqueueMarker(cl_command_queue command_queue, +cl_event *event) { cl_int err = CL_SUCCESS; CHECK_QUEUE(command_queue); @@ -2633,7 +2633,26 @@ clEnqueueMarker(cl_command_queue command_queue, goto error; } - cl_event_marker(command_queue, event); + cl_event_marker_with_wait_list(command_queue, 0, NULL, event); +error: + return err; +} + +cl_int +clEnqueueMarkerWithWaitList(cl_command_queue command_queue, +cl_uint num_events_in_wait_list, +const cl_event *event_wait_list, +cl_event *event) +{ + cl_int err = CL_SUCCESS; + CHECK_QUEUE(command_queue); + if(event == NULL) { +err = CL_INVALID_VALUE; +goto error; + } + TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, command_queue->ctx); + + cl_event_marker_with_wait_list(command_queue, num_events_in_wait_list, event_wait_list, event); error: return err; } diff --git a/src/cl_event.c b/src/cl_event.c index 9d21984..aa065c2 100644 --- a/src/cl_event.c +++ b/src/cl_event.c @@ -486,16 +486,25 @@ void cl_event_update_status(cl_event event) cl_event_set_status(event, CL_COMPLETE); } -cl_int cl_event_marker(cl_command_queue queue, cl_event* event) +cl_int cl_event_marker_with_wait_list(cl_command_queue queue, +cl_uint num_events_in_wait_list, +const cl_event *event_wait_list, +cl_event* event) { enqueue_data data; + cl_uint i = 0; *event = cl_event_new(queue->ctx, queue, CL_COMMAND_MARKER, CL_TRUE); if(event == NULL) return CL_OUT_OF_HOST_MEMORY; - //if wait_events_num>0, the marker event need wait queue->wait_events - if(queue->wait_events_num > 0) { + //enqueues a marker command which waits for either a list of events to complete, or if the list is + //empty it waits for all commands previously enqueued in command_queue to complete before it completes. + if(num_events_in_wait_list > 0){ +data.type = EnqueueMarker; +cl_event_new_enqueue_callback(*event, &data, num_events_in_wait_list, event_wait_list); +return CL_SUCCESS; + } else if(queue->wait_events_num > 0) { data.type = EnqueueMarker; cl_event_new_enqueue_callback(*event, &data, queue->wait_events_num, queue->wait_events); return CL_SUCCESS; diff --git a/src/cl_event.h b/src/cl_event.h index 3c61110..5a78a8d 100644 --- a/src/cl_event.h +++ b/src/cl_event.h @@ -90,7 +90,7 @@ void cl_event_set_status(cl_event, cl_int); /* Check and update event status */ void cl_event_update_status(cl_event); /* Create the marker event */ -cl_int cl_event_marker(cl_command_queue, cl_event*); +cl_int cl_event_marker_with_wait_list(cl_command_queue, cl_uint, const cl_event *, cl_event*); /* Do the event profiling */ cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name); #endif /* __CL_EVENT_H__ */ -- 1.8.1.2 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH v4 2/2] add [opencl-1.2] API clEnqueueMarkerWithWaitList.
From: Luo --- src/cl_api.c | 25 ++--- src/cl_event.c | 14 +++--- src/cl_event.h | 2 +- 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/src/cl_api.c b/src/cl_api.c index 1543ff4..de67f01 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -2623,8 +2623,8 @@ error: } cl_int -clEnqueueMarker(cl_command_queue command_queue, -cl_event * event) +clEnqueueMarker(cl_command_queue command_queue, +cl_event *event) { cl_int err = CL_SUCCESS; CHECK_QUEUE(command_queue); @@ -2633,7 +2633,26 @@ clEnqueueMarker(cl_command_queue command_queue, goto error; } - cl_event_marker(command_queue, event); + cl_event_marker_with_wait_list(command_queue, 0, NULL, event); +error: + return err; +} + +cl_int +clEnqueueMarkerWithWaitList(cl_command_queue command_queue, +cl_uint num_events_in_wait_list, +const cl_event *event_wait_list, +cl_event *event) +{ + cl_int err = CL_SUCCESS; + CHECK_QUEUE(command_queue); + if(event == NULL) { +err = CL_INVALID_VALUE; +goto error; + } + TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, command_queue->ctx); + + cl_event_marker_with_wait_list(command_queue, num_events_in_wait_list, event_wait_list, event); error: return err; } diff --git a/src/cl_event.c b/src/cl_event.c index 9d21984..49dd423 100644 --- a/src/cl_event.c +++ b/src/cl_event.c @@ -486,7 +486,10 @@ void cl_event_update_status(cl_event event) cl_event_set_status(event, CL_COMPLETE); } -cl_int cl_event_marker(cl_command_queue queue, cl_event* event) +cl_int cl_event_marker_with_wait_list(cl_command_queue queue, +cl_uint num_events_in_wait_list, +const cl_event *event_wait_list, +cl_event* event) { enqueue_data data; @@ -494,8 +497,13 @@ cl_int cl_event_marker(cl_command_queue queue, cl_event* event) if(event == NULL) return CL_OUT_OF_HOST_MEMORY; - //if wait_events_num>0, the marker event need wait queue->wait_events - if(queue->wait_events_num > 0) { + //enqueues a marker command which waits for either a list of events to complete, or if the list is + //empty it waits for all commands previously enqueued in command_queue to complete before it completes. + if(num_events_in_wait_list > 0){ +data.type = EnqueueMarker; +cl_event_new_enqueue_callback(*event, &data, num_events_in_wait_list, event_wait_list); +return CL_SUCCESS; + } else if(queue->wait_events_num > 0) { data.type = EnqueueMarker; cl_event_new_enqueue_callback(*event, &data, queue->wait_events_num, queue->wait_events); return CL_SUCCESS; diff --git a/src/cl_event.h b/src/cl_event.h index 3c61110..5a78a8d 100644 --- a/src/cl_event.h +++ b/src/cl_event.h @@ -90,7 +90,7 @@ void cl_event_set_status(cl_event, cl_int); /* Check and update event status */ void cl_event_update_status(cl_event); /* Create the marker event */ -cl_int cl_event_marker(cl_command_queue, cl_event*); +cl_int cl_event_marker_with_wait_list(cl_command_queue, cl_uint, const cl_event *, cl_event*); /* Do the event profiling */ cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name); #endif /* __CL_EVENT_H__ */ -- 1.8.1.2 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH 1/2] add test case barrier_list and marker_list.
From: Luo Signed-off-by: Luo --- utests/CMakeLists.txt | 2 ++ utests/barrier_list.cpp | 75 + utests/marker_list.cpp | 75 + 3 files changed, 152 insertions(+) create mode 100644 utests/barrier_list.cpp create mode 100644 utests/marker_list.cpp diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 415dcb6..f9cc3f1 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -153,6 +153,8 @@ set (utests_sources runtime_createcontext.cpp runtime_null_kernel_arg.cpp runtime_event.cpp + barrier_list.cpp + marker_list.cpp compiler_double.cpp compiler_double_2.cpp compiler_double_3.cpp diff --git a/utests/barrier_list.cpp b/utests/barrier_list.cpp new file mode 100644 index 000..e672482 --- /dev/null +++ b/utests/barrier_list.cpp @@ -0,0 +1,75 @@ +#include "utest_helper.hpp" + +#define BUFFERSIZE 32*1024 +void barrier_list(void) +{ + const size_t n = BUFFERSIZE; + cl_int cpu_src[BUFFERSIZE]; + cl_int cpu_src_2[BUFFERSIZE]; + cl_event ev[5]; + cl_int status = 0; + cl_int value = 34; + + // Setup kernel and buffers + OCL_CREATE_KERNEL("compiler_event"); + OCL_CREATE_BUFFER(buf[0], 0, BUFFERSIZE*sizeof(int), NULL); + OCL_CREATE_BUFFER(buf[1], 0, BUFFERSIZE*sizeof(int), NULL); + + for(cl_uint i=0; i= CL_SUBMITTED); + } + + + buf_data[0] = clEnqueueMapBuffer(queue, buf[0], CL_TRUE, 0, 0, BUFFERSIZE*sizeof(int), 1, &ev[2], NULL, NULL); + + clEnqueueBarrierWithWaitList(queue, 0, NULL, &ev[3]); + + clEnqueueWriteBuffer(queue, buf[1], CL_TRUE, 0, BUFFERSIZE*sizeof(int), (void *)cpu_src_2, 0, NULL, &ev[4]); + + OCL_FINISH(); + clGetEventInfo(ev[4], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL); + OCL_ASSERT(status != CL_COMPLETE); + + OCL_SET_USER_EVENT_STATUS(ev[0], CL_COMPLETE); + + clGetEventInfo(ev[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL); + OCL_ASSERT(status == CL_COMPLETE); + + OCL_FINISH(); + + for (cl_uint i = 0; i != sizeof(ev) / sizeof(cl_event); ++i) { +clGetEventInfo(ev[i], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL); +OCL_ASSERT(status <= CL_COMPLETE); + } + + for (uint32_t i = 0; i < n; ++i) { +OCL_ASSERT(((int*)buf_data[0])[i] == (int)value + 0x3); + } + clEnqueueUnmapMemObject(queue, buf[0], buf_data[0], 0, NULL, NULL); + + for (cl_uint i = 0; i != sizeof(ev) / sizeof(cl_event); ++i) { +clReleaseEvent(ev[i]); + } +} + +MAKE_UTEST_FROM_FUNCTION(barrier_list); diff --git a/utests/marker_list.cpp b/utests/marker_list.cpp new file mode 100644 index 000..cb4e749 --- /dev/null +++ b/utests/marker_list.cpp @@ -0,0 +1,75 @@ +#include "utest_helper.hpp" + +#define BUFFERSIZE 32*1024 +void marker_list(void) +{ + const size_t n = BUFFERSIZE; + cl_int cpu_src[BUFFERSIZE]; + cl_int cpu_src_2[BUFFERSIZE]; + cl_event ev[5]; + cl_int status = 0; + cl_int value = 34; + + // Setup kernel and buffers + OCL_CREATE_KERNEL("compiler_event"); + OCL_CREATE_BUFFER(buf[0], 0, BUFFERSIZE*sizeof(int), NULL); + OCL_CREATE_BUFFER(buf[1], 0, BUFFERSIZE*sizeof(int), NULL); + + for(cl_uint i=0; i= CL_SUBMITTED); + } + + + buf_data[0] = clEnqueueMapBuffer(queue, buf[0], CL_TRUE, 0, 0, BUFFERSIZE*sizeof(int), 1, &ev[2], NULL, NULL); + + clEnqueueMarkerWithWaitList(queue, 0, NULL, &ev[3]); + + clEnqueueWriteBuffer(queue, buf[1], CL_TRUE, 0, BUFFERSIZE*sizeof(int), (void *)cpu_src_2, 0, NULL, &ev[4]); + + OCL_FINISH(); + clGetEventInfo(ev[4], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL); + OCL_ASSERT(status == CL_COMPLETE); + + OCL_SET_USER_EVENT_STATUS(ev[0], CL_COMPLETE); + + clGetEventInfo(ev[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL); + OCL_ASSERT(status == CL_COMPLETE); + + OCL_FINISH(); + + for (cl_uint i = 0; i != sizeof(ev) / sizeof(cl_event); ++i) { +clGetEventInfo(ev[i], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL); +OCL_ASSERT(status <= CL_COMPLETE); + } + + for (uint32_t i = 0; i < n; ++i) { +OCL_ASSERT(((int*)buf_data[0])[i] == (int)value + 0x3); + } + clEnqueueUnmapMemObject(queue, buf[0], buf_data[0], 0, NULL, NULL); + + for (cl_uint i = 0; i != sizeof(ev) / sizeof(cl_event); ++i) { +clReleaseEvent(ev[i]); + } +} + +MAKE_UTEST_FROM_FUNCTION(marker_list); -- 1.8.1.2 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH 2/2] add [opencl-1.2] API clEnqueueBarrierWithWaitList.
From: Luo API clEnqueueMarkerWithWaitList patch didn't push the latest, update in this patch; the difference between clEnqueueMarkerWithWaitList and this API is that this API would block later enqueued calls, so added a barrier_events queue to manager barrier's wait list. Signed-off-by: Luo --- src/cl_api.c | 22 +++- src/cl_command_queue.c | 66 +-- src/cl_command_queue.h | 9 ++-- src/cl_enqueue.h | 1 + src/cl_event.c | 144 + src/cl_event.h | 6 +++ 6 files changed, 206 insertions(+), 42 deletions(-) diff --git a/src/cl_api.c b/src/cl_api.c index 9c22819..8902665 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -2753,8 +2753,28 @@ clEnqueueBarrier(cl_command_queue command_queue) { cl_int err = CL_SUCCESS; CHECK_QUEUE(command_queue); - cl_command_queue_set_barrier(command_queue); + cl_event_barrier_with_wait_list(command_queue, 0, NULL, NULL); + +error: + return err; +} + +cl_int +clEnqueueBarrierWithWaitList(cl_command_queue command_queue, +cl_uint num_events_in_wait_list, +const cl_event *event_wait_list, +cl_event *event) +{ + cl_int err = CL_SUCCESS; + CHECK_QUEUE(command_queue); + if(event == NULL) { +err = CL_INVALID_VALUE; +goto error; + } + TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, command_queue->ctx); + + cl_event_barrier_with_wait_list(command_queue, num_events_in_wait_list, event_wait_list, event); error: return err; } diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index 6a699c0..c754ad5 100644 --- a/src/cl_command_queue.c +++ b/src/cl_command_queue.c @@ -492,18 +492,74 @@ cl_command_queue_remove_event(cl_command_queue queue, cl_event event) if(i == queue->wait_events_num) return; - if(queue->barrier_index >= i) -queue->barrier_index -= 1; - for(; iwait_events_num-1; i++) { queue->wait_events[i] = queue->wait_events[i+1]; } queue->wait_events_num -= 1; } +#define DEFAULT_WAIT_EVENTS_SIZE 16 LOCAL void -cl_command_queue_set_barrier(cl_command_queue queue) +cl_command_queue_insert_barrier_event(cl_command_queue queue, cl_event event) { -queue->barrier_index = queue->wait_events_num; + cl_int i=0; + cl_event *new_list; + + assert(queue != NULL); + if(queue->barrier_events == NULL) { +queue->barrier_events_size = DEFAULT_WAIT_EVENTS_SIZE; +TRY_ALLOC_NO_ERR (queue->barrier_events, CALLOC_ARRAY(cl_event, queue->barrier_events_size)); + } + + for(i=0; ibarrier_events_num; i++) { +if(queue->barrier_events[i] == event) + return; //is in the barrier_events, need to insert + } + + if(queue->barrier_events_num < queue->barrier_events_size) { +queue->barrier_events[queue->barrier_events_num++] = event; +return; + } + + //barrier_events_num == barrier_events_size, array is full + queue->barrier_events_size *= 2; + TRY_ALLOC_NO_ERR (new_list, CALLOC_ARRAY(cl_event, queue->barrier_events_size)); + memcpy(new_list, queue->barrier_events, sizeof(cl_event)*queue->barrier_events_num); + cl_free(queue->barrier_events); + queue->barrier_events = new_list; + queue->barrier_events[queue->barrier_events_num++] = event; + return; + +exit: + return; +error: + if(queue->barrier_events) +cl_free(queue->barrier_events); + queue->barrier_events = NULL; + queue->barrier_events_size = 0; + queue->barrier_events_num = 0; + goto exit; + } +LOCAL void +cl_command_queue_remove_barrier_event(cl_command_queue queue, cl_event event) +{ + cl_int i=0; + + if(queue->barrier_events_num == 0) +return; + + for(i=0; ibarrier_events_num; i++) { +if(queue->barrier_events[i] == event) + break; + } + + if(i == queue->barrier_events_num) +return; + + for(; ibarrier_events_num-1; i++) { +queue->barrier_events[i] = queue->barrier_events[i+1]; + } + queue->barrier_events_num -= 1; +} diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h index 40c272c..b79d63a 100644 --- a/src/cl_command_queue.h +++ b/src/cl_command_queue.h @@ -34,10 +34,12 @@ struct _cl_command_queue { uint64_t magic; /* To identify it as a command queue */ volatile int ref_n; /* We reference count this object */ cl_context ctx; /* Its parent context */ + cl_event* barrier_events; /* Point to array of non-complete user events that block this command queue */ + cl_intbarrier_events_num; /* Number of Non-complete user events */ + cl_intbarrier_events_size; /* The size of array that wait_events point to */ cl_event* wait_events; /* Point to array of non-complete user events that block this command queue */ cl_intwait_events_num; /* Number of Non-complete user events */ cl_intwait_events_size; /* The size of array that wait_events point to */ - cl_intbarrier_index;
[Beignet] [PATCH V1 2/2] add test case runtime_barrier_list and runtime_marker_list.
From: Luo --- utests/CMakeLists.txt | 2 ++ utests/runtime_barrier_list.cpp | 75 + utests/runtime_marker_list.cpp | 75 + 3 files changed, 152 insertions(+) create mode 100644 utests/runtime_barrier_list.cpp create mode 100644 utests/runtime_marker_list.cpp diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 415dcb6..fcce083 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -153,6 +153,8 @@ set (utests_sources runtime_createcontext.cpp runtime_null_kernel_arg.cpp runtime_event.cpp + runtime_barrier_list.cpp + runtime_marker_list.cpp compiler_double.cpp compiler_double_2.cpp compiler_double_3.cpp diff --git a/utests/runtime_barrier_list.cpp b/utests/runtime_barrier_list.cpp new file mode 100644 index 000..6987d5e --- /dev/null +++ b/utests/runtime_barrier_list.cpp @@ -0,0 +1,75 @@ +#include "utest_helper.hpp" + +#define BUFFERSIZE 32*1024 +void runtime_barrier_list(void) +{ + const size_t n = BUFFERSIZE; + cl_int cpu_src[BUFFERSIZE]; + cl_int cpu_src_2[BUFFERSIZE]; + cl_event ev[5]; + cl_int status = 0; + cl_int value = 34; + + // Setup kernel and buffers + OCL_CREATE_KERNEL("compiler_event"); + OCL_CREATE_BUFFER(buf[0], 0, BUFFERSIZE*sizeof(int), NULL); + OCL_CREATE_BUFFER(buf[1], 0, BUFFERSIZE*sizeof(int), NULL); + + for(cl_uint i=0; i= CL_SUBMITTED); + } + + + buf_data[0] = clEnqueueMapBuffer(queue, buf[0], CL_TRUE, 0, 0, BUFFERSIZE*sizeof(int), 1, &ev[2], NULL, NULL); + + clEnqueueBarrierWithWaitList(queue, 0, NULL, &ev[3]); + + clEnqueueWriteBuffer(queue, buf[1], CL_TRUE, 0, BUFFERSIZE*sizeof(int), (void *)cpu_src_2, 0, NULL, &ev[4]); + + OCL_FINISH(); + clGetEventInfo(ev[4], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL); + OCL_ASSERT(status != CL_COMPLETE); + + OCL_SET_USER_EVENT_STATUS(ev[0], CL_COMPLETE); + + clGetEventInfo(ev[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL); + OCL_ASSERT(status == CL_COMPLETE); + + OCL_FINISH(); + + for (cl_uint i = 0; i != sizeof(ev) / sizeof(cl_event); ++i) { +clGetEventInfo(ev[i], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL); +OCL_ASSERT(status <= CL_COMPLETE); + } + + for (uint32_t i = 0; i < n; ++i) { +OCL_ASSERT(((int*)buf_data[0])[i] == (int)value + 0x3); + } + clEnqueueUnmapMemObject(queue, buf[0], buf_data[0], 0, NULL, NULL); + + for (cl_uint i = 0; i != sizeof(ev) / sizeof(cl_event); ++i) { +clReleaseEvent(ev[i]); + } +} + +MAKE_UTEST_FROM_FUNCTION(runtime_barrier_list); diff --git a/utests/runtime_marker_list.cpp b/utests/runtime_marker_list.cpp new file mode 100644 index 000..fc77156 --- /dev/null +++ b/utests/runtime_marker_list.cpp @@ -0,0 +1,75 @@ +#include "utest_helper.hpp" + +#define BUFFERSIZE 32*1024 +void runtime_marker_list(void) +{ + const size_t n = BUFFERSIZE; + cl_int cpu_src[BUFFERSIZE]; + cl_int cpu_src_2[BUFFERSIZE]; + cl_event ev[5]; + cl_int status = 0; + cl_int value = 34; + + // Setup kernel and buffers + OCL_CREATE_KERNEL("compiler_event"); + OCL_CREATE_BUFFER(buf[0], 0, BUFFERSIZE*sizeof(int), NULL); + OCL_CREATE_BUFFER(buf[1], 0, BUFFERSIZE*sizeof(int), NULL); + + for(cl_uint i=0; i= CL_SUBMITTED); + } + + + buf_data[0] = clEnqueueMapBuffer(queue, buf[0], CL_TRUE, 0, 0, BUFFERSIZE*sizeof(int), 1, &ev[2], NULL, NULL); + + clEnqueueMarkerWithWaitList(queue, 0, NULL, &ev[3]); + + clEnqueueWriteBuffer(queue, buf[1], CL_TRUE, 0, BUFFERSIZE*sizeof(int), (void *)cpu_src_2, 0, NULL, &ev[4]); + + OCL_FINISH(); + clGetEventInfo(ev[4], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL); + OCL_ASSERT(status == CL_COMPLETE); + + OCL_SET_USER_EVENT_STATUS(ev[0], CL_COMPLETE); + + clGetEventInfo(ev[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL); + OCL_ASSERT(status == CL_COMPLETE); + + OCL_FINISH(); + + for (cl_uint i = 0; i != sizeof(ev) / sizeof(cl_event); ++i) { +clGetEventInfo(ev[i], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL); +OCL_ASSERT(status <= CL_COMPLETE); + } + + for (uint32_t i = 0; i < n; ++i) { +OCL_ASSERT(((int*)buf_data[0])[i] == (int)value + 0x3); + } + clEnqueueUnmapMemObject(queue, buf[0], buf_data[0], 0, NULL, NULL); + + for (cl_uint i = 0; i != sizeof(ev) / sizeof(cl_event); ++i) { +clReleaseEvent(ev[i]); + } +} + +MAKE_UTEST_FROM_FUNCTION(runtime_marker_list); -- 1.8.1.2 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH V1 1/2] add [opencl-1.2] API clEnqueueBarrierWithWaitList.
From: Luo This command blocks command execution, that is, any following commands enqueued after it do not execute until it completes; API clEnqueueMarkerWithWaitList patch didn't push the latest, update in this patch. --- src/cl_api.c | 19 +- src/cl_command_queue.c | 66 +-- src/cl_command_queue.h | 9 ++- src/cl_enqueue.h | 1 + src/cl_event.c | 167 - src/cl_event.h | 6 ++ 6 files changed, 202 insertions(+), 66 deletions(-) diff --git a/src/cl_api.c b/src/cl_api.c index 9c22819..ad70b65 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -2753,8 +2753,25 @@ clEnqueueBarrier(cl_command_queue command_queue) { cl_int err = CL_SUCCESS; CHECK_QUEUE(command_queue); - cl_command_queue_set_barrier(command_queue); + cl_event_barrier_with_wait_list(command_queue, 0, NULL, NULL); + +error: + return err; +} + +cl_int +clEnqueueBarrierWithWaitList(cl_command_queue command_queue, +cl_uint num_events_in_wait_list, +const cl_event *event_wait_list, +cl_event *event) +{ + cl_int err = CL_SUCCESS; + CHECK_QUEUE(command_queue); + + TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, command_queue->ctx); + + cl_event_barrier_with_wait_list(command_queue, num_events_in_wait_list, event_wait_list, event); error: return err; } diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index 6a699c0..c754ad5 100644 --- a/src/cl_command_queue.c +++ b/src/cl_command_queue.c @@ -492,18 +492,74 @@ cl_command_queue_remove_event(cl_command_queue queue, cl_event event) if(i == queue->wait_events_num) return; - if(queue->barrier_index >= i) -queue->barrier_index -= 1; - for(; iwait_events_num-1; i++) { queue->wait_events[i] = queue->wait_events[i+1]; } queue->wait_events_num -= 1; } +#define DEFAULT_WAIT_EVENTS_SIZE 16 LOCAL void -cl_command_queue_set_barrier(cl_command_queue queue) +cl_command_queue_insert_barrier_event(cl_command_queue queue, cl_event event) { -queue->barrier_index = queue->wait_events_num; + cl_int i=0; + cl_event *new_list; + + assert(queue != NULL); + if(queue->barrier_events == NULL) { +queue->barrier_events_size = DEFAULT_WAIT_EVENTS_SIZE; +TRY_ALLOC_NO_ERR (queue->barrier_events, CALLOC_ARRAY(cl_event, queue->barrier_events_size)); + } + + for(i=0; ibarrier_events_num; i++) { +if(queue->barrier_events[i] == event) + return; //is in the barrier_events, need to insert + } + + if(queue->barrier_events_num < queue->barrier_events_size) { +queue->barrier_events[queue->barrier_events_num++] = event; +return; + } + + //barrier_events_num == barrier_events_size, array is full + queue->barrier_events_size *= 2; + TRY_ALLOC_NO_ERR (new_list, CALLOC_ARRAY(cl_event, queue->barrier_events_size)); + memcpy(new_list, queue->barrier_events, sizeof(cl_event)*queue->barrier_events_num); + cl_free(queue->barrier_events); + queue->barrier_events = new_list; + queue->barrier_events[queue->barrier_events_num++] = event; + return; + +exit: + return; +error: + if(queue->barrier_events) +cl_free(queue->barrier_events); + queue->barrier_events = NULL; + queue->barrier_events_size = 0; + queue->barrier_events_num = 0; + goto exit; + } +LOCAL void +cl_command_queue_remove_barrier_event(cl_command_queue queue, cl_event event) +{ + cl_int i=0; + + if(queue->barrier_events_num == 0) +return; + + for(i=0; ibarrier_events_num; i++) { +if(queue->barrier_events[i] == event) + break; + } + + if(i == queue->barrier_events_num) +return; + + for(; ibarrier_events_num-1; i++) { +queue->barrier_events[i] = queue->barrier_events[i+1]; + } + queue->barrier_events_num -= 1; +} diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h index 40c272c..b79d63a 100644 --- a/src/cl_command_queue.h +++ b/src/cl_command_queue.h @@ -34,10 +34,12 @@ struct _cl_command_queue { uint64_t magic; /* To identify it as a command queue */ volatile int ref_n; /* We reference count this object */ cl_context ctx; /* Its parent context */ + cl_event* barrier_events; /* Point to array of non-complete user events that block this command queue */ + cl_intbarrier_events_num; /* Number of Non-complete user events */ + cl_intbarrier_events_size; /* The size of array that wait_events point to */ cl_event* wait_events; /* Point to array of non-complete user events that block this command queue */ cl_intwait_events_num; /* Number of Non-complete user events */ cl_intwait_events_size; /* The size of array that wait_events point to */ - cl_intbarrier_index; /* Indicate event count in wait_events as barrier events */ cl_event last_event;/* The last event in the queue, for enqueue mark
[Beignet] [PATCH 1/3] [opencl-1.2] move enqueue_copy_image kernels outside of runtime code.
From: Luo seperate the kernel code from host code to make it clean; build the kernels offline by gbe_bin_generator to improve the performance. --- src/CMakeLists.txt | 25 ++- src/cl_context.h | 16 +- src/cl_gt_device.h | 23 ++- src/cl_mem.c | 214 ++--- src/kernels/cl_internal_copy_buf_align1.cl | 8 - src/kernels/cl_internal_copy_buf_align16.cl| 2 +- src/kernels/cl_internal_copy_buf_align4.cl | 2 +- src/kernels/cl_internal_copy_buf_rect.cl | 15 ++ .../cl_internal_copy_buf_unalign_dst_offset.cl | 2 +- .../cl_internal_copy_buf_unalign_same_offset.cl| 2 +- .../cl_internal_copy_buf_unalign_src_offset.cl | 2 +- src/kernels/cl_internal_copy_buffer_to_image_2d.cl | 18 ++ src/kernels/cl_internal_copy_buffer_to_image_3d.cl | 19 ++ src/kernels/cl_internal_copy_image_2d_to_2d.cl | 21 ++ src/kernels/cl_internal_copy_image_2d_to_3d.cl | 22 +++ src/kernels/cl_internal_copy_image_2d_to_buffer.cl | 19 ++ src/kernels/cl_internal_copy_image_3d_to_2d.cl | 22 +++ src/kernels/cl_internal_copy_image_3d_to_3d.cl | 23 +++ src/kernels/cl_internal_copy_image_3d_to_buffer.cl | 22 +++ 19 files changed, 301 insertions(+), 176 deletions(-) delete mode 100644 src/kernels/cl_internal_copy_buf_align1.cl create mode 100644 src/kernels/cl_internal_copy_buf_rect.cl create mode 100644 src/kernels/cl_internal_copy_buffer_to_image_2d.cl create mode 100644 src/kernels/cl_internal_copy_buffer_to_image_3d.cl create mode 100644 src/kernels/cl_internal_copy_image_2d_to_2d.cl create mode 100644 src/kernels/cl_internal_copy_image_2d_to_3d.cl create mode 100644 src/kernels/cl_internal_copy_image_2d_to_buffer.cl create mode 100644 src/kernels/cl_internal_copy_image_3d_to_2d.cl create mode 100644 src/kernels/cl_internal_copy_image_3d_to_3d.cl create mode 100644 src/kernels/cl_internal_copy_image_3d_to_buffer.cl diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a3bac02..da7e1eb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -17,14 +17,33 @@ foreach (KF ${KERNEL_FILES}) endforeach (KF) endmacro (MakeKernelBinStr) +macro (MakeBuiltInKernelStr KERNEL_PATH KERNEL_FILES) + set (output_file ${KERNEL_PATH}/${BUILT_IN_NAME}.cl) + set (file_content) + file (REMOVE ${output_file}) + foreach (KF ${KERNEL_NAMES}) +set (input_file ${KERNEL_PATH}/${KF}.cl) +file(READ ${input_file} file_content ) +STRING(REGEX REPLACE ";" ";" file_content "${file_content}") +file(APPEND ${output_file} ${file_content}) + endforeach (KF) +endmacro (MakeBuiltInKernelStr) + set (KERNEL_STR_FILES) -set (KERNEL_NAMES cl_internal_copy_buf_align1 cl_internal_copy_buf_align4 +set (KERNEL_NAMES cl_internal_copy_buf_align4 cl_internal_copy_buf_align16 cl_internal_copy_buf_unalign_same_offset cl_internal_copy_buf_unalign_dst_offset cl_internal_copy_buf_unalign_src_offset -cl_internal_fill_buf_align8 cl_internal_fill_buf_align4 -cl_internal_fill_buf_align2 cl_internal_fill_buf_unalign +cl_internal_copy_buf_rect cl_internal_copy_image_2d_to_2d cl_internal_copy_image_3d_to_2d +cl_internal_copy_image_2d_to_3d cl_internal_copy_image_3d_to_3d +cl_internal_copy_image_2d_to_buffer cl_internal_copy_image_3d_to_buffer +cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_3d +cl_internal_fill_buf_unalign cl_internal_fill_buf_align2 +cl_internal_fill_buf_align4 cl_internal_fill_buf_align8 cl_internal_fill_buf_align128) +set (BUILT_IN_NAME cl_internal_built_in_kernel) +MakeBuiltInKernelStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}") MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}") +MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${BUILT_IN_NAME}") set(OPENCL_SRC ${KERNEL_STR_FILES} diff --git a/src/cl_context.h b/src/cl_context.h index b2562ce..65b1728 100644 --- a/src/cl_context.h +++ b/src/cl_context.h @@ -47,14 +47,14 @@ enum _cl_internal_ker_type { CL_ENQUEUE_COPY_BUFFER_UNALIGN_DST_OFFSET, CL_ENQUEUE_COPY_BUFFER_UNALIGN_SRC_OFFSET, CL_ENQUEUE_COPY_BUFFER_RECT, - CL_ENQUEUE_COPY_IMAGE_0, //copy image 2d to image 2d - CL_ENQUEUE_COPY_IMAGE_1, //copy image 3d to image 2d - CL_ENQUEUE_COPY_IMAGE_2, //copy image 2d to image 3d - CL_ENQUEUE_COPY_IMAGE_3, //copy image 3d to image 3d - CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_0, //copy image 2d to buffer - CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_1, //copy image 3d tobuffer - CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_0, //copy buffer to image 2d - CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_1, //copy buffer to image 3d + CL_ENQUEUE_COPY_IMAGE_2D_TO_2D, //copy image 2d to image 2d + CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, //copy image 3d to image 2d + CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, //copy image 2d to imag
[Beignet] [PATCH 2/3] add [opencl-1.2] API clCreateProgramWithBuiltinKernel.
From: Luo This API creates a built-in program object for a context, and loads the built-in kernels into this program object. --- backend/src/ir/image.cpp | 5 src/cl_api.c | 24 src/cl_context.c | 8 ++ src/cl_context.h | 2 ++ src/cl_program.c | 74 src/cl_program.h | 7 + 6 files changed, 120 insertions(+) diff --git a/backend/src/ir/image.cpp b/backend/src/ir/image.cpp index 8c34d70..87bafc0 100644 --- a/backend/src/ir/image.cpp +++ b/backend/src/ir/image.cpp @@ -125,7 +125,12 @@ namespace ir { void ImageSet::getData(struct ImageInfo *imageInfos) const { for(auto &it : regMap) + { +int t = it.second->idx - gbe_get_image_base_index(); +if(t < 0) + continue; imageInfos[it.second->idx - gbe_get_image_base_index()] = *it.second; + } } ImageSet::~ImageSet() { diff --git a/src/cl_api.c b/src/cl_api.c index 9c22819..0a1c4ab 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -816,6 +816,30 @@ error: *errcode_ret = err; return program; } + +cl_program +clCreateProgramWithBuiltInKernels(cl_context context, + cl_uint num_devices, + const cl_device_id * device_list, + const char * kernel_names, + cl_int * errcode_ret) +{ + cl_program program = NULL; + cl_int err = CL_SUCCESS; + + CHECK_CONTEXT (context); + INVALID_VALUE_IF (kernel_names == NULL); + program = cl_program_create_with_built_in_kernles(context, +num_devices, +device_list, +kernel_names, +&err); +error: + if (errcode_ret) +*errcode_ret = err; + return program; +} + cl_int clRetainProgram(cl_program program) { diff --git a/src/cl_context.c b/src/cl_context.c index 293af94..6172ecc 100644 --- a/src/cl_context.c +++ b/src/cl_context.c @@ -206,8 +206,16 @@ cl_context_delete(cl_context ctx) cl_program_delete(ctx->internal_prgs[i]); ctx->internal_prgs[i] = NULL; } + +if (ctx->internel_kernels[i]) { + cl_kernel_delete(ctx->built_in_kernels[i]); + ctx->built_in_kernels[i] = NULL; +} } + cl_program_delete(ctx->built_in_prgs); + ctx->built_in_prgs = NULL; + /* All object lists should have been freed. Otherwise, the reference counter * of the context cannot be 0 */ diff --git a/src/cl_context.h b/src/cl_context.h index 65b1728..cba0a0a 100644 --- a/src/cl_context.h +++ b/src/cl_context.h @@ -102,6 +102,8 @@ struct _cl_context { /* All programs internal used, for example clEnqueuexxx api use */ cl_kernel internel_kernels[CL_INTERNAL_KERNEL_MAX]; /* All kernels for clenqueuexxx api, for example clEnqueuexxx api use */ + cl_program built_in_prgs; /*all built-in kernels belongs to this program only*/ + cl_kernel built_in_kernels[CL_INTERNAL_KERNEL_MAX]; uint32_t ver; /* Gen version */ struct _cl_context_prop props; cl_context_properties * prop_user; /* a copy of user passed context properties when create context */ diff --git a/src/cl_program.c b/src/cl_program.c index 184d6b5..87a1e6b 100644 --- a/src/cl_program.c +++ b/src/cl_program.c @@ -209,6 +209,80 @@ error: } LOCAL cl_program +cl_program_create_with_built_in_kernles(cl_context ctx, + cl_uint num_devices, + const cl_device_id * devices, + const char * kernel_names, + cl_int * errcode_ret) +{ + cl_int err = CL_SUCCESS; + + assert(ctx); + INVALID_DEVICE_IF (num_devices != 1); + INVALID_DEVICE_IF (devices == NULL); + INVALID_DEVICE_IF (devices[0] != ctx->device); + + extern char cl_internal_built_in_kernel_str[]; + extern int cl_internal_built_in_kernel_str_size; + char* p_built_in_kernel_str =cl_internal_built_in_kernel_str; + cl_int binary_status = CL_SUCCESS; + + ctx->built_in_prgs = cl_program_create_from_binary(ctx, 1, + &ctx->device, + (size_t*)&cl_internal_built_in_kernel_str_size, + (const unsigned char **)&p_built_in_kernel_str, + &binary_status, &err); + + if (!ctx->built_in_prgs) +return NULL; + + err = cl_program_build(ctx->built_in_prgs, NULL); + if (err != CL_SUCCESS) +return NULL; + + ctx->built_in_prgs->is_built = 1; + + char
[Beignet] [PATCH 3/3] [opencl-1.2] add test case for API clCreateProgramWithBuiltInKernels.
From: Luo --- utests/CMakeLists.txt | 1 + utests/enqueue_built_in_kernels.cpp | 20 2 files changed, 21 insertions(+) create mode 100644 utests/enqueue_built_in_kernels.cpp diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 415dcb6..90585d9 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -176,6 +176,7 @@ set (utests_sources enqueue_copy_buf.cpp enqueue_copy_buf_unaligned.cpp enqueue_fill_buf.cpp + enqueue_built_in_kernels.cpp utest_assert.cpp utest.cpp utest_file_map.cpp diff --git a/utests/enqueue_built_in_kernels.cpp b/utests/enqueue_built_in_kernels.cpp new file mode 100644 index 000..8b47bca --- /dev/null +++ b/utests/enqueue_built_in_kernels.cpp @@ -0,0 +1,20 @@ +#include "utest_helper.hpp" + +void enqueue_built_in_kernels(void) +{ + char* built_in_kernel_names; + size_t built_in_kernels_size; + cl_int err = CL_SUCCESS; + size_t ret_sz; + + + OCL_CALL (clGetDeviceInfo, device, CL_DEVICE_BUILT_IN_KERNELS, 0, 0, &built_in_kernels_size); + built_in_kernel_names = (char* )malloc(built_in_kernels_size * sizeof(char) ); + OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_BUILT_IN_KERNELS, built_in_kernels_size, (void*)built_in_kernel_names, &ret_sz); + OCL_ASSERT(ret_sz == built_in_kernels_size); + cl_program built_in_prog = clCreateProgramWithBuiltInKernels(ctx, 1, &device, built_in_kernel_names, &err); + OCL_ASSERT(built_in_prog != NULL); + +} + +MAKE_UTEST_FROM_FUNCTION(enqueue_built_in_kernels); -- 1.8.1.2 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH 1/2] fix event related bugs.
From: Luo 1. remove repeated user events in list. 2. missed braces in loops. 3. fix barrier event reference not incresed. --- src/cl_alloc.c | 1 + src/cl_event.c | 111 - src/cl_event.h | 4 +++ 3 files changed, 75 insertions(+), 41 deletions(-) diff --git a/src/cl_alloc.c b/src/cl_alloc.c index 20d5578..93d2e6a 100644 --- a/src/cl_alloc.c +++ b/src/cl_alloc.c @@ -71,6 +71,7 @@ cl_free(void *ptr) return; atomic_dec(&cl_alloc_n); free(ptr); + ptr = NULL; } LOCAL size_t diff --git a/src/cl_event.c b/src/cl_event.c index 727ee1f..101e735 100644 --- a/src/cl_event.c +++ b/src/cl_event.c @@ -231,6 +231,9 @@ cl_int cl_event_wait_events(cl_uint num_events_in_wait_list, const cl_event *eve } if(queue && queue->barrier_index > 0) { +for(j=0; jwait_events_num; j++){ + cl_event_add_ref(queue->wait_events[j]); //add defer enqueue's wait event reference + } return CL_ENQUEUE_EXECUTE_DEFER; } @@ -258,9 +261,10 @@ void cl_event_new_enqueue_callback(cl_event event, user_event *user_events, *u_ev; cl_command_queue queue = event->queue; cl_int i; + cl_int err = CL_SUCCESS; GET_QUEUE_THREAD_GPGPU(data->queue); - /* Allocate and inialize the structure itself */ + /* Allocate and initialize the structure itself */ TRY_ALLOC_NO_ERR (cb, CALLOC(enqueue_callback)); cb->num_events = num_events_in_wait_list; TRY_ALLOC_NO_ERR (cb->wait_list, CALLOC_ARRAY(cl_event, num_events_in_wait_list)); @@ -276,22 +280,20 @@ void cl_event_new_enqueue_callback(cl_event event, node = queue->wait_events[i]->waits_head; if(node == NULL) queue->wait_events[i]->waits_head = cb; - else + else{ while((node != cb) && node->next) node = node->next; if(node == cb) //wait on dup user event continue; node->next = cb; + } /* Insert the user event to enqueue_callback's wait_user_events */ - TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event)); - u_ev->event = queue->wait_events[i]; - u_ev->next = cb->wait_user_events; - cb->wait_user_events = u_ev; + TRY(cl_event_insert_user_event, &cb->wait_user_events, queue->wait_events[i]); } } - /* Find out all user events that events in event_wait_list wait */ + /* Find out all user events that in event_wait_list wait */ for(i=0; istatus <= CL_COMPLETE) continue; @@ -309,31 +311,29 @@ void cl_event_new_enqueue_callback(cl_event event, node->next = cb; } /* Insert the user event to enqueue_callback's wait_user_events */ - TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event)); - u_ev->event = event_wait_list[i]; - u_ev->next = cb->wait_user_events; - cb->wait_user_events = u_ev; + TRY(cl_event_insert_user_event, &cb->wait_user_events, event_wait_list[i]); cl_command_queue_insert_event(event->queue, event_wait_list[i]); } else if(event_wait_list[i]->enqueue_cb != NULL) { user_events = event_wait_list[i]->enqueue_cb->wait_user_events; while(user_events != NULL) { /* Insert the enqueue_callback to user event's waits_tail */ node = user_events->event->waits_head; -while((node != cb) && node->next) - node = node->next; -if(node == cb) { //wait on dup user event - user_events = user_events->next; - continue; +if(node == NULL) + event_wait_list[i]->waits_head = cb; +else{ + while((node != cb) && node->next) +node = node->next; + if(node == cb) { //wait on dup user event +user_events = user_events->next; +continue; + } + node->next = cb; } -node->next = cb; /* Insert the user event to enqueue_callback's wait_user_events */ -TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event)); -u_ev->event = user_events->event; -u_ev->next = cb->wait_user_events; -cb->wait_user_events = u_ev; +TRY(cl_event_insert_user_event, &cb->wait_user_events, user_events->event); +cl_command_queue_insert_event(event->queue, user_events->event); user_events = user_events->next; -cl_command_queue_insert_event(event->queue, event_wait_list[i]); } } } @@ -363,7 +363,6 @@ error: void cl_event_set_status(cl_event event, cl_int status) { user_callback *user_cb; - user_event*u_ev, *u_ev_next; cl_int ret, i; cl_event evt; @@ -419,23 +418,8 @@ void cl_event_set_status(cl_event event, cl_int status) /* Check all defer enqueue */ enqueue_callback *cb, *enqueue_cb = event->waits_head; while(enqueue_cb) { -/* Remove this user event in enqueue_cb */ -while(enqueue_cb->wait_user_events && - enqueue_cb->wait_user_events->event == event) { - u_ev = enqueue_cb->wait_user_events; - enqueue_cb->wait_user_events = enqueue_cb->wait_user_e
[Beignet] [PATCH 2/2] move enqueue_copy_image kernels outside of runtime code.
From: Luo seperate the kernel code from host code to make it clean; build the kernels offline by gbe_bin_generator to improve the performance. --- src/CMakeLists.txt | 23 ++- src/cl_context.h | 24 ++- src/cl_gt_device.h | 23 ++- src/cl_mem.c | 214 ++--- src/kernels/cl_internal_copy_buf_align1.cl | 8 - src/kernels/cl_internal_copy_buf_align16.cl| 2 +- src/kernels/cl_internal_copy_buf_align4.cl | 2 +- src/kernels/cl_internal_copy_buf_rect.cl | 15 ++ .../cl_internal_copy_buf_unalign_dst_offset.cl | 2 +- .../cl_internal_copy_buf_unalign_same_offset.cl| 2 +- .../cl_internal_copy_buf_unalign_src_offset.cl | 2 +- src/kernels/cl_internal_copy_buffer_to_image_2d.cl | 18 ++ src/kernels/cl_internal_copy_buffer_to_image_3d.cl | 19 ++ src/kernels/cl_internal_copy_image_2d_to_2d.cl | 21 ++ src/kernels/cl_internal_copy_image_2d_to_3d.cl | 22 +++ src/kernels/cl_internal_copy_image_2d_to_buffer.cl | 19 ++ src/kernels/cl_internal_copy_image_3d_to_2d.cl | 22 +++ src/kernels/cl_internal_copy_image_3d_to_3d.cl | 23 +++ src/kernels/cl_internal_copy_image_3d_to_buffer.cl | 22 +++ 19 files changed, 308 insertions(+), 175 deletions(-) delete mode 100644 src/kernels/cl_internal_copy_buf_align1.cl create mode 100644 src/kernels/cl_internal_copy_buf_rect.cl create mode 100644 src/kernels/cl_internal_copy_buffer_to_image_2d.cl create mode 100644 src/kernels/cl_internal_copy_buffer_to_image_3d.cl create mode 100644 src/kernels/cl_internal_copy_image_2d_to_2d.cl create mode 100644 src/kernels/cl_internal_copy_image_2d_to_3d.cl create mode 100644 src/kernels/cl_internal_copy_image_2d_to_buffer.cl create mode 100644 src/kernels/cl_internal_copy_image_3d_to_2d.cl create mode 100644 src/kernels/cl_internal_copy_image_3d_to_3d.cl create mode 100644 src/kernels/cl_internal_copy_image_3d_to_buffer.cl diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8164a44..ecc04ab 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -17,11 +17,30 @@ foreach (KF ${KERNEL_FILES}) endforeach (KF) endmacro (MakeKernelBinStr) +macro (MakeBuiltInKernelStr KERNEL_PATH KERNEL_FILES) + set (output_file ${KERNEL_PATH}/${BUILT_IN_NAME}.cl) + set (file_content) + file (REMOVE ${output_file}) + foreach (KF ${KERNEL_NAMES}) +set (input_file ${KERNEL_PATH}/${KF}.cl) +file(READ ${input_file} file_content ) +STRING(REGEX REPLACE ";" ";" file_content "${file_content}") +file(APPEND ${output_file} ${file_content}) + endforeach (KF) +endmacro (MakeBuiltInKernelStr) + set (KERNEL_STR_FILES) -set (KERNEL_NAMES cl_internal_copy_buf_align1 cl_internal_copy_buf_align4 +set (KERNEL_NAMES cl_internal_copy_buf_align4 cl_internal_copy_buf_align16 cl_internal_copy_buf_unalign_same_offset -cl_internal_copy_buf_unalign_dst_offset cl_internal_copy_buf_unalign_src_offset) +cl_internal_copy_buf_unalign_dst_offset cl_internal_copy_buf_unalign_src_offset +cl_internal_copy_buf_rect cl_internal_copy_image_2d_to_2d cl_internal_copy_image_3d_to_2d +cl_internal_copy_image_2d_to_3d cl_internal_copy_image_3d_to_3d +cl_internal_copy_image_2d_to_buffer cl_internal_copy_image_3d_to_buffer +cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_3d) +set (BUILT_IN_NAME cl_internal_built_in_kernel) +MakeBuiltInKernelStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}") MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}") +MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${BUILT_IN_NAME}") set(OPENCL_SRC ${KERNEL_STR_FILES} diff --git a/src/cl_context.h b/src/cl_context.h index 782a9af..24281be 100644 --- a/src/cl_context.h +++ b/src/cl_context.h @@ -46,14 +46,22 @@ enum _cl_internal_ker_type { CL_ENQUEUE_COPY_BUFFER_UNALIGN_DST_OFFSET, CL_ENQUEUE_COPY_BUFFER_UNALIGN_SRC_OFFSET, CL_ENQUEUE_COPY_BUFFER_RECT, - CL_ENQUEUE_COPY_IMAGE_0, //copy image 2d to image 2d - CL_ENQUEUE_COPY_IMAGE_1, //copy image 3d to image 2d - CL_ENQUEUE_COPY_IMAGE_2, //copy image 2d to image 3d - CL_ENQUEUE_COPY_IMAGE_3, //copy image 3d to image 3d - CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_0, //copy image 2d to buffer - CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_1, //copy image 3d tobuffer - CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_0, //copy buffer to image 2d - CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_1, //copy buffer to image 3d + CL_ENQUEUE_COPY_IMAGE_2D_TO_2D, //copy image 2d to image 2d + CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, //copy image 3d to image 2d + CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, //copy image 2d to image 3d + CL_ENQUEUE_COPY_IMAGE_3D_TO_3D, //copy image 3d to image 3d + CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER, //copy image 2d to buffer + CL_ENQUEUE_COPY_IMAGE_3D_TO
[Beignet] [PATCH V1 1/2] fix event related bugs.
From: Luo 1. remove repeated user events in list. 2. missed braces in loops. 3. fix barrier event reference not incresed. --- src/cl_alloc.c | 1 + src/cl_event.c | 111 - src/cl_event.h | 4 +++ 3 files changed, 75 insertions(+), 41 deletions(-) diff --git a/src/cl_alloc.c b/src/cl_alloc.c index 20d5578..93d2e6a 100644 --- a/src/cl_alloc.c +++ b/src/cl_alloc.c @@ -71,6 +71,7 @@ cl_free(void *ptr) return; atomic_dec(&cl_alloc_n); free(ptr); + ptr = NULL; } LOCAL size_t diff --git a/src/cl_event.c b/src/cl_event.c index 727ee1f..101e735 100644 --- a/src/cl_event.c +++ b/src/cl_event.c @@ -231,6 +231,9 @@ cl_int cl_event_wait_events(cl_uint num_events_in_wait_list, const cl_event *eve } if(queue && queue->barrier_index > 0) { +for(j=0; jwait_events_num; j++){ + cl_event_add_ref(queue->wait_events[j]); //add defer enqueue's wait event reference + } return CL_ENQUEUE_EXECUTE_DEFER; } @@ -258,9 +261,10 @@ void cl_event_new_enqueue_callback(cl_event event, user_event *user_events, *u_ev; cl_command_queue queue = event->queue; cl_int i; + cl_int err = CL_SUCCESS; GET_QUEUE_THREAD_GPGPU(data->queue); - /* Allocate and inialize the structure itself */ + /* Allocate and initialize the structure itself */ TRY_ALLOC_NO_ERR (cb, CALLOC(enqueue_callback)); cb->num_events = num_events_in_wait_list; TRY_ALLOC_NO_ERR (cb->wait_list, CALLOC_ARRAY(cl_event, num_events_in_wait_list)); @@ -276,22 +280,20 @@ void cl_event_new_enqueue_callback(cl_event event, node = queue->wait_events[i]->waits_head; if(node == NULL) queue->wait_events[i]->waits_head = cb; - else + else{ while((node != cb) && node->next) node = node->next; if(node == cb) //wait on dup user event continue; node->next = cb; + } /* Insert the user event to enqueue_callback's wait_user_events */ - TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event)); - u_ev->event = queue->wait_events[i]; - u_ev->next = cb->wait_user_events; - cb->wait_user_events = u_ev; + TRY(cl_event_insert_user_event, &cb->wait_user_events, queue->wait_events[i]); } } - /* Find out all user events that events in event_wait_list wait */ + /* Find out all user events that in event_wait_list wait */ for(i=0; istatus <= CL_COMPLETE) continue; @@ -309,31 +311,29 @@ void cl_event_new_enqueue_callback(cl_event event, node->next = cb; } /* Insert the user event to enqueue_callback's wait_user_events */ - TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event)); - u_ev->event = event_wait_list[i]; - u_ev->next = cb->wait_user_events; - cb->wait_user_events = u_ev; + TRY(cl_event_insert_user_event, &cb->wait_user_events, event_wait_list[i]); cl_command_queue_insert_event(event->queue, event_wait_list[i]); } else if(event_wait_list[i]->enqueue_cb != NULL) { user_events = event_wait_list[i]->enqueue_cb->wait_user_events; while(user_events != NULL) { /* Insert the enqueue_callback to user event's waits_tail */ node = user_events->event->waits_head; -while((node != cb) && node->next) - node = node->next; -if(node == cb) { //wait on dup user event - user_events = user_events->next; - continue; +if(node == NULL) + event_wait_list[i]->waits_head = cb; +else{ + while((node != cb) && node->next) +node = node->next; + if(node == cb) { //wait on dup user event +user_events = user_events->next; +continue; + } + node->next = cb; } -node->next = cb; /* Insert the user event to enqueue_callback's wait_user_events */ -TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event)); -u_ev->event = user_events->event; -u_ev->next = cb->wait_user_events; -cb->wait_user_events = u_ev; +TRY(cl_event_insert_user_event, &cb->wait_user_events, user_events->event); +cl_command_queue_insert_event(event->queue, user_events->event); user_events = user_events->next; -cl_command_queue_insert_event(event->queue, event_wait_list[i]); } } } @@ -363,7 +363,6 @@ error: void cl_event_set_status(cl_event event, cl_int status) { user_callback *user_cb; - user_event*u_ev, *u_ev_next; cl_int ret, i; cl_event evt; @@ -419,23 +418,8 @@ void cl_event_set_status(cl_event event, cl_int status) /* Check all defer enqueue */ enqueue_callback *cb, *enqueue_cb = event->waits_head; while(enqueue_cb) { -/* Remove this user event in enqueue_cb */ -while(enqueue_cb->wait_user_events && - enqueue_cb->wait_user_events->event == event) { - u_ev = enqueue_cb->wait_user_events; - enqueue_cb->wait_user_events = enqueue_cb->wait_user_e
[Beignet] [PATCH V1 2/2] move enqueue_copy_image kernels outside of runtime code.
From: Luo seperate the kernel code from host code to make it clean; build the kernels offline by gbe_bin_generator to improve the performance. --- src/CMakeLists.txt | 23 ++- src/cl_context.h | 16 +- src/cl_mem.c | 214 ++--- src/kernels/cl_internal_copy_buf_align1.cl | 8 - src/kernels/cl_internal_copy_buf_align16.cl| 2 +- src/kernels/cl_internal_copy_buf_align4.cl | 2 +- src/kernels/cl_internal_copy_buf_rect.cl | 15 ++ .../cl_internal_copy_buf_unalign_dst_offset.cl | 2 +- .../cl_internal_copy_buf_unalign_same_offset.cl| 2 +- .../cl_internal_copy_buf_unalign_src_offset.cl | 2 +- src/kernels/cl_internal_copy_buffer_to_image_2d.cl | 18 ++ src/kernels/cl_internal_copy_buffer_to_image_3d.cl | 19 ++ src/kernels/cl_internal_copy_image_2d_to_2d.cl | 21 ++ src/kernels/cl_internal_copy_image_2d_to_3d.cl | 22 +++ src/kernels/cl_internal_copy_image_2d_to_buffer.cl | 19 ++ src/kernels/cl_internal_copy_image_3d_to_2d.cl | 22 +++ src/kernels/cl_internal_copy_image_3d_to_3d.cl | 23 +++ src/kernels/cl_internal_copy_image_3d_to_buffer.cl | 22 +++ 18 files changed, 278 insertions(+), 174 deletions(-) delete mode 100644 src/kernels/cl_internal_copy_buf_align1.cl create mode 100644 src/kernels/cl_internal_copy_buf_rect.cl create mode 100644 src/kernels/cl_internal_copy_buffer_to_image_2d.cl create mode 100644 src/kernels/cl_internal_copy_buffer_to_image_3d.cl create mode 100644 src/kernels/cl_internal_copy_image_2d_to_2d.cl create mode 100644 src/kernels/cl_internal_copy_image_2d_to_3d.cl create mode 100644 src/kernels/cl_internal_copy_image_2d_to_buffer.cl create mode 100644 src/kernels/cl_internal_copy_image_3d_to_2d.cl create mode 100644 src/kernels/cl_internal_copy_image_3d_to_3d.cl create mode 100644 src/kernels/cl_internal_copy_image_3d_to_buffer.cl diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8164a44..ecc04ab 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -17,11 +17,30 @@ foreach (KF ${KERNEL_FILES}) endforeach (KF) endmacro (MakeKernelBinStr) +macro (MakeBuiltInKernelStr KERNEL_PATH KERNEL_FILES) + set (output_file ${KERNEL_PATH}/${BUILT_IN_NAME}.cl) + set (file_content) + file (REMOVE ${output_file}) + foreach (KF ${KERNEL_NAMES}) +set (input_file ${KERNEL_PATH}/${KF}.cl) +file(READ ${input_file} file_content ) +STRING(REGEX REPLACE ";" ";" file_content "${file_content}") +file(APPEND ${output_file} ${file_content}) + endforeach (KF) +endmacro (MakeBuiltInKernelStr) + set (KERNEL_STR_FILES) -set (KERNEL_NAMES cl_internal_copy_buf_align1 cl_internal_copy_buf_align4 +set (KERNEL_NAMES cl_internal_copy_buf_align4 cl_internal_copy_buf_align16 cl_internal_copy_buf_unalign_same_offset -cl_internal_copy_buf_unalign_dst_offset cl_internal_copy_buf_unalign_src_offset) +cl_internal_copy_buf_unalign_dst_offset cl_internal_copy_buf_unalign_src_offset +cl_internal_copy_buf_rect cl_internal_copy_image_2d_to_2d cl_internal_copy_image_3d_to_2d +cl_internal_copy_image_2d_to_3d cl_internal_copy_image_3d_to_3d +cl_internal_copy_image_2d_to_buffer cl_internal_copy_image_3d_to_buffer +cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_3d) +set (BUILT_IN_NAME cl_internal_built_in_kernel) +MakeBuiltInKernelStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}") MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}") +MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${BUILT_IN_NAME}") set(OPENCL_SRC ${KERNEL_STR_FILES} diff --git a/src/cl_context.h b/src/cl_context.h index 782a9af..82d3217 100644 --- a/src/cl_context.h +++ b/src/cl_context.h @@ -46,14 +46,14 @@ enum _cl_internal_ker_type { CL_ENQUEUE_COPY_BUFFER_UNALIGN_DST_OFFSET, CL_ENQUEUE_COPY_BUFFER_UNALIGN_SRC_OFFSET, CL_ENQUEUE_COPY_BUFFER_RECT, - CL_ENQUEUE_COPY_IMAGE_0, //copy image 2d to image 2d - CL_ENQUEUE_COPY_IMAGE_1, //copy image 3d to image 2d - CL_ENQUEUE_COPY_IMAGE_2, //copy image 2d to image 3d - CL_ENQUEUE_COPY_IMAGE_3, //copy image 3d to image 3d - CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_0, //copy image 2d to buffer - CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_1, //copy image 3d tobuffer - CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_0, //copy buffer to image 2d - CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_1, //copy buffer to image 3d + CL_ENQUEUE_COPY_IMAGE_2D_TO_2D, //copy image 2d to image 2d + CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, //copy image 3d to image 2d + CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, //copy image 2d to image 3d + CL_ENQUEUE_COPY_IMAGE_3D_TO_3D, //copy image 3d to image 3d + CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER, //copy image 2d to buffer + CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER, //copy image 3d tobuffer + CL_ENQUEUE_COPY_BUFFER_T
[Beignet] [PATCH] add [opencl-1.2] API clCreateSubDevice.
From: Luo creates an array of sub-devices that each reference a non-intersecting set of compute units within in_device, according to a partition scheme given by properties. --- src/cl_api.c | 10 -- src/cl_device_id.c | 6 ++ src/cl_device_id.h | 7 +++ src/cl_gt_device.h | 7 ++- 4 files changed, 27 insertions(+), 3 deletions(-) diff --git a/src/cl_api.c b/src/cl_api.c index 9c22819..2077d02 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -242,8 +242,14 @@ clCreateSubDevices(cl_device_id in_device, cl_device_id * out_devices, cl_uint *num_devices_ret) { - NOT_IMPLEMENTED; - return 0; + /* Check parameter consistency */ + if (UNLIKELY(out_devices == NULL && num_devices_ret == NULL)) +return CL_INVALID_VALUE; + if (UNLIKELY(in_device == NULL && properties == NULL)) +return CL_INVALID_VALUE; + + *num_devices_ret = 0; + return CL_SUCCESS; } cl_int diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 2b443c6..37f49be 100644 --- a/src/cl_device_id.c +++ b/src/cl_device_id.c @@ -346,6 +346,12 @@ cl_get_device_info(cl_device_id device, DECL_STRING_FIELD(OPENCL_C_VERSION, opencl_c_version) DECL_STRING_FIELD(EXTENSIONS, extensions); DECL_STRING_FIELD(BUILT_IN_KERNELS, built_in_kernels) +DECL_FIELD(PARENT_DEVICE, parent_device) +DECL_FIELD(PARTITION_MAX_SUB_DEVICES, partition_max_sub_device) +DECL_FIELD(PARTITION_PROPERTIES, partition_property) +DECL_FIELD(PARTITION_AFFINITY_DOMAIN, affinity_domain) +DECL_FIELD(PARTITION_TYPE, partition_type) +DECL_FIELD(REFERENCE_COUNT, device_reference_count) case CL_DRIVER_VERSION: if (param_value_size_ret) { diff --git a/src/cl_device_id.h b/src/cl_device_id.h index 5f7c9fe..6f8d25f 100644 --- a/src/cl_device_id.h +++ b/src/cl_device_id.h @@ -97,6 +97,13 @@ struct _cl_device_id { /* Kernel specific info that we're assigning statically */ size_t wg_sz; size_t preferred_wg_sz_mul; + /* SubDevice specific info */ + cl_device_id parent_device; + cl_uint partition_max_sub_device; + cl_device_partition_property partition_property[3]; + cl_device_affinity_domainaffinity_domain; + cl_device_partition_property partition_type[3]; + cl_uint device_reference_count; }; /* Get a device from the given platform */ diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h index 110988a..88decd7 100644 --- a/src/cl_gt_device.h +++ b/src/cl_gt_device.h @@ -78,5 +78,10 @@ DECL_INFO_STRING(extensions, "") DECL_INFO_STRING(built_in_kernels, "") DECL_INFO_STRING(driver_version, LIBCL_DRIVER_VERSION_STRING) #undef DECL_INFO_STRING - +.parent_device = NULL, +.partition_max_sub_device = 1, +.partition_property = {0}, +.affinity_domain = 0, +.partition_type = {0}, +.device_reference_count = 1, -- 1.8.1.2 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH] remove the code of saving the llvm bitcode to file, replace it with llvm::Module pointer.
From: Luo Save the Act and module pointer to GenProgram, delete it in the destructor. --- backend/src/backend/gen_program.cpp | 31 +--- backend/src/backend/gen_program.hpp | 4 +++- backend/src/backend/program.cpp | 47 + backend/src/backend/program.h | 3 ++- backend/src/backend/program.hpp | 2 +- backend/src/llvm/llvm_to_gen.cpp| 12 ++ backend/src/llvm/llvm_to_gen.hpp| 2 +- src/cl_program.c| 2 +- 8 files changed, 55 insertions(+), 48 deletions(-) diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index 52db904..a311c71 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -22,6 +22,17 @@ * \author Benjamin Segovia */ +#include "llvm/Config/config.h" +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2 +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/DataLayout.h" +#else +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/DataLayout.h" +#endif /* LLVM_VERSION_MINOR <= 2 */ + #include "backend/program.h" #include "backend/gen_program.h" #include "backend/gen_program.hpp" @@ -33,6 +44,8 @@ #include "ir/unit.hpp" #include "llvm/llvm_to_gen.hpp" +#include + #include #include #include @@ -72,7 +85,17 @@ namespace gbe { fclose(f); } - GenProgram::~GenProgram(void) {} + GenProgram::~GenProgram(void){ +if(module){ + delete (llvm::Module*)module; + module = NULL; +} + +if(Act){ + delete (clang::CodeGenAction*)Act; + Act = NULL; +} + } /*! We must avoid spilling at all cost with Gen */ static const struct CodeGenStrategy { @@ -177,16 +200,18 @@ namespace gbe { static gbe_program genProgramNewFromLLVM(uint32_t deviceID, const char *fileName, + const void* module, + const void* act, size_t stringSize, char *err, size_t *errSize, int optLevel) { using namespace gbe; -GenProgram *program = GBE_NEW(GenProgram, deviceID); +GenProgram *program = GBE_NEW(GenProgram, deviceID, module, act); std::string error; // Try to compile the program -if (program->buildFromLLVMFile(fileName, error, optLevel) == false) { +if (program->buildFromLLVMFile(fileName, module, error, optLevel) == false) { if (err != NULL && errSize != NULL && stringSize > 0u) { const size_t msgSize = std::min(error.size(), stringSize-1u); std::memcpy(err, error.c_str(), msgSize); diff --git a/backend/src/backend/gen_program.hpp b/backend/src/backend/gen_program.hpp index ea54b49..b17dfc8 100644 --- a/backend/src/backend/gen_program.hpp +++ b/backend/src/backend/gen_program.hpp @@ -58,7 +58,7 @@ namespace gbe { public: /*! Create an empty program */ -GenProgram(uint32_t deviceID) : deviceID(deviceID) {} +GenProgram(uint32_t deviceID, const void* mod = NULL, const void* act = NULL) : deviceID(deviceID),module((void*)mod), Act((void*)act) {} /*! Current device ID*/ uint32_t deviceID; /*! Destroy the program */ @@ -69,6 +69,8 @@ namespace gbe virtual Kernel *allocateKernel(const std::string &name) { return GBE_NEW(GenKernel, name); } +void* module; +void* Act; /*! Use custom allocators */ GBE_CLASS(GenProgram); }; diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp index bdc7d34..6745d70 100644 --- a/backend/src/backend/program.cpp +++ b/backend/src/backend/program.cpp @@ -34,6 +34,7 @@ #include "llvm/Config/config.h" #include "llvm/Support/Threading.h" #include "llvm/Support/ManagedStatic.h" +#include "llvm/Transforms/Utils/Cloning.h" #include #include #include @@ -102,9 +103,9 @@ namespace gbe { BVAR(OCL_OUTPUT_GEN_IR, false); - bool Program::buildFromLLVMFile(const char *fileName, std::string &error, int optLevel) { + bool Program::buildFromLLVMFile(const char *fileName, const void* module, std::string &error, int optLevel) { ir::Unit *unit = new ir::Unit(); -if (llvmToGen(*unit, fileName, optLevel) == false) { +if (llvmToGen(*unit, fileName, module, optLevel) == false) { error = std::string(fileName) + " not found"; return false; } @@ -113,7 +114,7 @@ namespace gbe { if(!unit->getValid()) { delete unit; //clear unit unit = new ir::Unit(); - llvmToGen(*unit, fileName, 0); //suppose file exists and llvmToGen will not return false. + llvmToGen(*unit, fileName, module, 0); //suppose file exists and llvmToGen will not return false. } assert(unit->getValid()); this->buildFro
[Beignet] [PATCH] remove the code of saving the llvm bitcode to file, replace it with llvm::Module pointer.
From: Luo Save the Act and module pointer to GenProgram, delete it in the destructor. --- backend/src/backend/gen_program.cpp | 31 +--- backend/src/backend/gen_program.hpp | 4 +++- backend/src/backend/program.cpp | 47 + backend/src/backend/program.h | 3 ++- backend/src/backend/program.hpp | 2 +- backend/src/llvm/llvm_to_gen.cpp| 12 ++ backend/src/llvm/llvm_to_gen.hpp| 2 +- src/cl_program.c| 2 +- 8 files changed, 55 insertions(+), 48 deletions(-) diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index 52db904..a311c71 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -22,6 +22,17 @@ * \author Benjamin Segovia */ +#include "llvm/Config/config.h" +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2 +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/DataLayout.h" +#else +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/DataLayout.h" +#endif /* LLVM_VERSION_MINOR <= 2 */ + #include "backend/program.h" #include "backend/gen_program.h" #include "backend/gen_program.hpp" @@ -33,6 +44,8 @@ #include "ir/unit.hpp" #include "llvm/llvm_to_gen.hpp" +#include + #include #include #include @@ -72,7 +85,17 @@ namespace gbe { fclose(f); } - GenProgram::~GenProgram(void) {} + GenProgram::~GenProgram(void){ +if(module){ + delete (llvm::Module*)module; + module = NULL; +} + +if(Act){ + delete (clang::CodeGenAction*)Act; + Act = NULL; +} + } /*! We must avoid spilling at all cost with Gen */ static const struct CodeGenStrategy { @@ -177,16 +200,18 @@ namespace gbe { static gbe_program genProgramNewFromLLVM(uint32_t deviceID, const char *fileName, + const void* module, + const void* act, size_t stringSize, char *err, size_t *errSize, int optLevel) { using namespace gbe; -GenProgram *program = GBE_NEW(GenProgram, deviceID); +GenProgram *program = GBE_NEW(GenProgram, deviceID, module, act); std::string error; // Try to compile the program -if (program->buildFromLLVMFile(fileName, error, optLevel) == false) { +if (program->buildFromLLVMFile(fileName, module, error, optLevel) == false) { if (err != NULL && errSize != NULL && stringSize > 0u) { const size_t msgSize = std::min(error.size(), stringSize-1u); std::memcpy(err, error.c_str(), msgSize); diff --git a/backend/src/backend/gen_program.hpp b/backend/src/backend/gen_program.hpp index ea54b49..b17dfc8 100644 --- a/backend/src/backend/gen_program.hpp +++ b/backend/src/backend/gen_program.hpp @@ -58,7 +58,7 @@ namespace gbe { public: /*! Create an empty program */ -GenProgram(uint32_t deviceID) : deviceID(deviceID) {} +GenProgram(uint32_t deviceID, const void* mod = NULL, const void* act = NULL) : deviceID(deviceID),module((void*)mod), Act((void*)act) {} /*! Current device ID*/ uint32_t deviceID; /*! Destroy the program */ @@ -69,6 +69,8 @@ namespace gbe virtual Kernel *allocateKernel(const std::string &name) { return GBE_NEW(GenKernel, name); } +void* module; +void* Act; /*! Use custom allocators */ GBE_CLASS(GenProgram); }; diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp index bdc7d34..6745d70 100644 --- a/backend/src/backend/program.cpp +++ b/backend/src/backend/program.cpp @@ -34,6 +34,7 @@ #include "llvm/Config/config.h" #include "llvm/Support/Threading.h" #include "llvm/Support/ManagedStatic.h" +#include "llvm/Transforms/Utils/Cloning.h" #include #include #include @@ -102,9 +103,9 @@ namespace gbe { BVAR(OCL_OUTPUT_GEN_IR, false); - bool Program::buildFromLLVMFile(const char *fileName, std::string &error, int optLevel) { + bool Program::buildFromLLVMFile(const char *fileName, const void* module, std::string &error, int optLevel) { ir::Unit *unit = new ir::Unit(); -if (llvmToGen(*unit, fileName, optLevel) == false) { +if (llvmToGen(*unit, fileName, module, optLevel) == false) { error = std::string(fileName) + " not found"; return false; } @@ -113,7 +114,7 @@ namespace gbe { if(!unit->getValid()) { delete unit; //clear unit unit = new ir::Unit(); - llvmToGen(*unit, fileName, 0); //suppose file exists and llvmToGen will not return false. + llvmToGen(*unit, fileName, module, 0); //suppose file exists and llvmToGen will not return false. } assert(unit->getValid()); this->buildFro
[Beignet] [fix merge issue 1/2] add [opencl-1.2] API clCreateProgramWithBuiltInKernels.
From: Luo This API creates a built-in program object for a context, and loads the built-in kernels into this program object. --- backend/src/ir/image.cpp | 5 src/cl_api.c | 24 src/cl_context.c | 8 ++ src/cl_context.h | 2 ++ src/cl_gt_device.h | 24 +++- src/cl_program.c | 74 src/cl_program.h | 7 + 7 files changed, 143 insertions(+), 1 deletion(-) diff --git a/backend/src/ir/image.cpp b/backend/src/ir/image.cpp index 8c34d70..87bafc0 100644 --- a/backend/src/ir/image.cpp +++ b/backend/src/ir/image.cpp @@ -125,7 +125,12 @@ namespace ir { void ImageSet::getData(struct ImageInfo *imageInfos) const { for(auto &it : regMap) + { +int t = it.second->idx - gbe_get_image_base_index(); +if(t < 0) + continue; imageInfos[it.second->idx - gbe_get_image_base_index()] = *it.second; + } } ImageSet::~ImageSet() { diff --git a/src/cl_api.c b/src/cl_api.c index 4b1deda..3a77dcd 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -816,6 +816,30 @@ error: *errcode_ret = err; return program; } + +cl_program +clCreateProgramWithBuiltInKernels(cl_context context, + cl_uint num_devices, + const cl_device_id * device_list, + const char * kernel_names, + cl_int * errcode_ret) +{ + cl_program program = NULL; + cl_int err = CL_SUCCESS; + + CHECK_CONTEXT (context); + INVALID_VALUE_IF (kernel_names == NULL); + program = cl_program_create_with_built_in_kernles(context, +num_devices, +device_list, +kernel_names, +&err); +error: + if (errcode_ret) +*errcode_ret = err; + return program; +} + cl_int clRetainProgram(cl_program program) { diff --git a/src/cl_context.c b/src/cl_context.c index 293af94..6172ecc 100644 --- a/src/cl_context.c +++ b/src/cl_context.c @@ -206,8 +206,16 @@ cl_context_delete(cl_context ctx) cl_program_delete(ctx->internal_prgs[i]); ctx->internal_prgs[i] = NULL; } + +if (ctx->internel_kernels[i]) { + cl_kernel_delete(ctx->built_in_kernels[i]); + ctx->built_in_kernels[i] = NULL; +} } + cl_program_delete(ctx->built_in_prgs); + ctx->built_in_prgs = NULL; + /* All object lists should have been freed. Otherwise, the reference counter * of the context cannot be 0 */ diff --git a/src/cl_context.h b/src/cl_context.h index 4de954c..e037634 100644 --- a/src/cl_context.h +++ b/src/cl_context.h @@ -103,6 +103,8 @@ struct _cl_context { /* All programs internal used, for example clEnqueuexxx api use */ cl_kernel internel_kernels[CL_INTERNAL_KERNEL_MAX]; /* All kernels for clenqueuexxx api, for example clEnqueuexxx api use */ + cl_program built_in_prgs; /*all built-in kernels belongs to this program only*/ + cl_kernel built_in_kernels[CL_INTERNAL_KERNEL_MAX]; uint32_t ver; /* Gen version */ struct _cl_context_prop props; cl_context_properties * prop_user; /* a copy of user passed context properties when create context */ diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h index 7e45b4e..3e2502c 100644 --- a/src/cl_gt_device.h +++ b/src/cl_gt_device.h @@ -75,7 +75,29 @@ DECL_INFO_STRING(version, LIBCL_VERSION_STRING) DECL_INFO_STRING(profile, "FULL_PROFILE") DECL_INFO_STRING(opencl_c_version, LIBCL_C_VERSION_STRING) DECL_INFO_STRING(extensions, "") -DECL_INFO_STRING(built_in_kernels, "") +DECL_INFO_STRING(built_in_kernels, "__cl_copy_region_align4;" + "__cl_copy_region_align16;" + "__cl_cpy_region_unalign_same_offset;" + "__cl_copy_region_unalign_dst_offset;" + "__cl_copy_region_unalign_src_offset;" + "__cl_copy_buffer_rect;" + "__cl_copy_image_2d_to_2d;" + "__cl_copy_image_3d_to_2d;" + "__cl_copy_image_2d_to_3d;" + "__cl_copy_image_3d_to_3d;" + "__cl_copy_image_2d_to_buffer;" + "__cl_copy_image_3d_to_buffer;" + "__cl_copy_buffer_to_image_2d;" + "__cl_copy_buffer_to_image_3d;" + "__cl_fill_region_unalign;" + "__cl_fill_region_align2;" +
[Beignet] [fix merge issue 2/2] add[opencl-1.2] test case for API clCreateProgramWithBuiltInKernels.
From: Luo --- utests/CMakeLists.txt | 1 + utests/enqueue_built_in_kernels.cpp | 20 2 files changed, 21 insertions(+) create mode 100644 utests/enqueue_built_in_kernels.cpp diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index cc8c497..5f0649f 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -176,6 +176,7 @@ set (utests_sources enqueue_copy_buf.cpp enqueue_copy_buf_unaligned.cpp enqueue_fill_buf.cpp + enqueue_built_in_kernels.cpp utest_assert.cpp utest.cpp utest_file_map.cpp diff --git a/utests/enqueue_built_in_kernels.cpp b/utests/enqueue_built_in_kernels.cpp new file mode 100644 index 000..8b47bca --- /dev/null +++ b/utests/enqueue_built_in_kernels.cpp @@ -0,0 +1,20 @@ +#include "utest_helper.hpp" + +void enqueue_built_in_kernels(void) +{ + char* built_in_kernel_names; + size_t built_in_kernels_size; + cl_int err = CL_SUCCESS; + size_t ret_sz; + + + OCL_CALL (clGetDeviceInfo, device, CL_DEVICE_BUILT_IN_KERNELS, 0, 0, &built_in_kernels_size); + built_in_kernel_names = (char* )malloc(built_in_kernels_size * sizeof(char) ); + OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_BUILT_IN_KERNELS, built_in_kernels_size, (void*)built_in_kernel_names, &ret_sz); + OCL_ASSERT(ret_sz == built_in_kernels_size); + cl_program built_in_prog = clCreateProgramWithBuiltInKernels(ctx, 1, &device, built_in_kernel_names, &err); + OCL_ASSERT(built_in_prog != NULL); + +} + +MAKE_UTEST_FROM_FUNCTION(enqueue_built_in_kernels); -- 1.8.1.2 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH V2] remove the code of saving the llvm bitcode to file, replace it with llvm::Module
From: Luo Save the LLVMContext and module pointer to GenProgram, delete it in the destructor. Signed-off-by: Luo --- backend/src/backend/gen_program.cpp | 31 --- backend/src/backend/gen_program.hpp | 4 ++- backend/src/backend/program.cpp | 50 - backend/src/backend/program.h | 3 ++- backend/src/backend/program.hpp | 2 +- backend/src/llvm/llvm_to_gen.cpp| 16 +++- backend/src/llvm/llvm_to_gen.hpp| 2 +- src/cl_program.c| 2 +- 8 files changed, 62 insertions(+), 48 deletions(-) diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index 52db904..74b6fa1 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -22,6 +22,17 @@ * \author Benjamin Segovia */ +#include "llvm/Config/config.h" +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2 +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/DataLayout.h" +#else +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/DataLayout.h" +#endif /* LLVM_VERSION_MINOR <= 2 */ + #include "backend/program.h" #include "backend/gen_program.h" #include "backend/gen_program.hpp" @@ -33,6 +44,8 @@ #include "ir/unit.hpp" #include "llvm/llvm_to_gen.hpp" +#include + #include #include #include @@ -72,7 +85,17 @@ namespace gbe { fclose(f); } - GenProgram::~GenProgram(void) {} + GenProgram::~GenProgram(void){ +if(module){ + delete (llvm::Module*)module; + module = NULL; +} + +if(llvm_ctx){ + delete (llvm::LLVMContext*)llvm_ctx; + llvm_ctx = NULL; +} + } /*! We must avoid spilling at all cost with Gen */ static const struct CodeGenStrategy { @@ -177,16 +200,18 @@ namespace gbe { static gbe_program genProgramNewFromLLVM(uint32_t deviceID, const char *fileName, + const void* module, + const void* llvm_ctx, size_t stringSize, char *err, size_t *errSize, int optLevel) { using namespace gbe; -GenProgram *program = GBE_NEW(GenProgram, deviceID); +GenProgram *program = GBE_NEW(GenProgram, deviceID, module, llvm_ctx); std::string error; // Try to compile the program -if (program->buildFromLLVMFile(fileName, error, optLevel) == false) { +if (program->buildFromLLVMFile(fileName, module, error, optLevel) == false) { if (err != NULL && errSize != NULL && stringSize > 0u) { const size_t msgSize = std::min(error.size(), stringSize-1u); std::memcpy(err, error.c_str(), msgSize); diff --git a/backend/src/backend/gen_program.hpp b/backend/src/backend/gen_program.hpp index ea54b49..70794c9 100644 --- a/backend/src/backend/gen_program.hpp +++ b/backend/src/backend/gen_program.hpp @@ -58,7 +58,7 @@ namespace gbe { public: /*! Create an empty program */ -GenProgram(uint32_t deviceID) : deviceID(deviceID) {} +GenProgram(uint32_t deviceID, const void* mod = NULL, const void* ctx = NULL) : deviceID(deviceID),module((void*)mod), llvm_ctx((void*)ctx) {} /*! Current device ID*/ uint32_t deviceID; /*! Destroy the program */ @@ -69,6 +69,8 @@ namespace gbe virtual Kernel *allocateKernel(const std::string &name) { return GBE_NEW(GenKernel, name); } +void* module; +void* llvm_ctx; /*! Use custom allocators */ GBE_CLASS(GenProgram); }; diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp index bdc7d34..66a5ce0 100644 --- a/backend/src/backend/program.cpp +++ b/backend/src/backend/program.cpp @@ -34,6 +34,8 @@ #include "llvm/Config/config.h" #include "llvm/Support/Threading.h" #include "llvm/Support/ManagedStatic.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/IR/LLVMContext.h" #include #include #include @@ -102,9 +104,9 @@ namespace gbe { BVAR(OCL_OUTPUT_GEN_IR, false); - bool Program::buildFromLLVMFile(const char *fileName, std::string &error, int optLevel) { + bool Program::buildFromLLVMFile(const char *fileName, const void* module, std::string &error, int optLevel) { ir::Unit *unit = new ir::Unit(); -if (llvmToGen(*unit, fileName, optLevel) == false) { +if (llvmToGen(*unit, fileName, module, optLevel) == false) { error = std::string(fileName) + " not found"; return false; } @@ -113,7 +115,7 @@ namespace gbe { if(!unit->getValid()) { delete unit; //clear unit unit = new ir::Unit(); - llvmToGen(*unit, fileName, 0); //suppose file exists and llvmToGen will not return false. + llvmToGen(*unit, fileName, module, 0); //suppose file exists and
[Beignet] [PATCH V1] remove the code of saving the llvm bitcode to file, replace it with llvm::Module
From: Luo Save the global LLVMContext and module pointer to GenProgram, delete the module pointer in the destructor. Signed-off-by: Luo --- backend/src/backend/gen_program.cpp | 30 --- backend/src/backend/gen_program.hpp | 4 +++- backend/src/backend/program.cpp | 48 +++-- backend/src/backend/program.h | 3 ++- backend/src/backend/program.hpp | 2 +- backend/src/llvm/llvm_to_gen.cpp| 16 - backend/src/llvm/llvm_to_gen.hpp| 2 +- src/cl_program.c| 2 +- 8 files changed, 59 insertions(+), 48 deletions(-) diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index 52db904..7019060 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -22,6 +22,17 @@ * \author Benjamin Segovia */ +#include "llvm/Config/config.h" +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2 +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/DataLayout.h" +#else +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/DataLayout.h" +#endif /* LLVM_VERSION_MINOR <= 2 */ + #include "backend/program.h" #include "backend/gen_program.h" #include "backend/gen_program.hpp" @@ -33,6 +44,8 @@ #include "ir/unit.hpp" #include "llvm/llvm_to_gen.hpp" +#include + #include #include #include @@ -72,7 +85,16 @@ namespace gbe { fclose(f); } - GenProgram::~GenProgram(void) {} + GenProgram::~GenProgram(void){ +if(module){ + delete (llvm::Module*)module; + module = NULL; +} + +if(llvm_ctx){ + llvm_ctx = NULL; +} + } /*! We must avoid spilling at all cost with Gen */ static const struct CodeGenStrategy { @@ -177,16 +199,18 @@ namespace gbe { static gbe_program genProgramNewFromLLVM(uint32_t deviceID, const char *fileName, + const void* module, + const void* llvm_ctx, size_t stringSize, char *err, size_t *errSize, int optLevel) { using namespace gbe; -GenProgram *program = GBE_NEW(GenProgram, deviceID); +GenProgram *program = GBE_NEW(GenProgram, deviceID, module, llvm_ctx); std::string error; // Try to compile the program -if (program->buildFromLLVMFile(fileName, error, optLevel) == false) { +if (program->buildFromLLVMFile(fileName, module, error, optLevel) == false) { if (err != NULL && errSize != NULL && stringSize > 0u) { const size_t msgSize = std::min(error.size(), stringSize-1u); std::memcpy(err, error.c_str(), msgSize); diff --git a/backend/src/backend/gen_program.hpp b/backend/src/backend/gen_program.hpp index ea54b49..70794c9 100644 --- a/backend/src/backend/gen_program.hpp +++ b/backend/src/backend/gen_program.hpp @@ -58,7 +58,7 @@ namespace gbe { public: /*! Create an empty program */ -GenProgram(uint32_t deviceID) : deviceID(deviceID) {} +GenProgram(uint32_t deviceID, const void* mod = NULL, const void* ctx = NULL) : deviceID(deviceID),module((void*)mod), llvm_ctx((void*)ctx) {} /*! Current device ID*/ uint32_t deviceID; /*! Destroy the program */ @@ -69,6 +69,8 @@ namespace gbe virtual Kernel *allocateKernel(const std::string &name) { return GBE_NEW(GenKernel, name); } +void* module; +void* llvm_ctx; /*! Use custom allocators */ GBE_CLASS(GenProgram); }; diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp index bdc7d34..f8e5d0f 100644 --- a/backend/src/backend/program.cpp +++ b/backend/src/backend/program.cpp @@ -34,6 +34,8 @@ #include "llvm/Config/config.h" #include "llvm/Support/Threading.h" #include "llvm/Support/ManagedStatic.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/IR/LLVMContext.h" #include #include #include @@ -102,9 +104,9 @@ namespace gbe { BVAR(OCL_OUTPUT_GEN_IR, false); - bool Program::buildFromLLVMFile(const char *fileName, std::string &error, int optLevel) { + bool Program::buildFromLLVMFile(const char *fileName, const void* module, std::string &error, int optLevel) { ir::Unit *unit = new ir::Unit(); -if (llvmToGen(*unit, fileName, optLevel) == false) { +if (llvmToGen(*unit, fileName, module, optLevel) == false) { error = std::string(fileName) + " not found"; return false; } @@ -113,7 +115,7 @@ namespace gbe { if(!unit->getValid()) { delete unit; //clear unit unit = new ir::Unit(); - llvmToGen(*unit, fileName, 0); //suppose file exists and llvmToGen will not return false. + llvmToGen(*unit, fileName, module, 0); //suppose file exists and llvmToGen will not
[Beignet] [PATCH 2/4] add [opencl-1.2] API clCompileProgram.
From: Luo This API compiles a program's source for all the devices or a specific device in the OpenCL context associated with program. The pre-processor runs before the program sources are compiled. Signed-off-by: Luo --- backend/src/backend/gen_program.cpp | 1 - backend/src/backend/program.cpp | 158 +++- backend/src/backend/program.h | 10 +++ src/cl_api.c| 41 ++ src/cl_program.c| 103 +++ src/cl_program.h| 8 +- 6 files changed, 318 insertions(+), 3 deletions(-) diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index 74b6fa1..7019060 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -92,7 +92,6 @@ namespace gbe { } if(llvm_ctx){ - delete (llvm::LLVMContext*)llvm_ctx; llvm_ctx = NULL; } } diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp index 90306cc..18895cd 100644 --- a/backend/src/backend/program.cpp +++ b/backend/src/backend/program.cpp @@ -801,7 +801,161 @@ namespace gbe { gbe_program p; // will delete the module and llvm_ctx in the destructor of GenProgram. llvm::Module * out_module; -llvm::LLVMContext* llvm_ctx = new llvm::LLVMContext; +llvm::LLVMContext* llvm_ctx = &llvm::getGlobalContext(); +if (buildModuleFromSource(clName.c_str(), &out_module, llvm_ctx, clOpt.c_str(), + stringSize, err, errSize)) { +// Now build the program from llvm + static std::mutex gbe_mutex; + gbe_mutex.lock(); + size_t clangErrSize = 0; + if (err != NULL) { +GBE_ASSERT(errSize != NULL); +stringSize -= *errSize; +err += *errSize; +clangErrSize = *errSize; + } + p = gbe_program_new_from_llvm(deviceID, NULL, out_module, llvm_ctx, stringSize, +err, errSize, optLevel); + if (err != NULL) +*errSize += clangErrSize; + gbe_mutex.unlock(); + if (OCL_OUTPUT_BUILD_LOG && options) +llvm::errs() << options; +} else + p = NULL; +remove(clName.c_str()); +return p; + } + + static gbe_program programCompileFromSource(uint32_t deviceID, + const char *source, + const char *temp_header_path, + size_t stringSize, + const char *options, + char *err, + size_t *errSize) + { +char clStr[] = "/tmp/XX.cl"; +int clFd = mkstemps(clStr, 3); +const std::string clName = std::string(clStr); +std::string clOpt; + +FILE *clFile = fdopen(clFd, "w"); +FATAL_IF(clFile == NULL, "Failed to open temporary file"); + +bool usePCH = OCL_USE_PCH; +bool findPCH = false; + +/* Because our header file is so big, we want to avoid recompile the header from + scratch. We use the PCH support of Clang to save the huge compiling time. + We just use the most general build opt to build the PCH header file, so if + user pass new build options here, the PCH can not pass the Clang's compitable + validating. Clang will do three kinds of compatible check: Language Option, + Target Option and Preprocessing Option. Other kinds of options such as the + CodeGen options will not affect the AST result, so no need to check. + + According to OpenCL 1.1's spec, the CL build options: + -D name=definition + If the definition is not used in our header, it is compitable + + -cl-single-precision-constant + -cl-denorms-are-zero + -cl-std= + Language options, really affect. + + -cl-opt-disable + -cl-mad-enable + -cl-no-signed-zeros + -cl-unsafe-math-optimizations + -cl-finite-math-only + -cl-fast-relaxed-math + CodeGen options, not affect + + -Werror + -w + Our header should not block the compiling because of warning. + + So we just disable the PCH validation of Clang and do the judgement by ourself. */ + +if(options) { + char *p; + /* FIXME: Though we can disable the pch valid check, and load pch successfully, + but these language opts and pre-defined macro will still generate the diag msg + to the diag engine of the Clang and cause the Clang to report error. + We filter them all here to avoid these. */ + const char * incompatible_opts[] = { + "-cl-single-precision-constant", +//"-cl-denorms-are-zero", + "-cl-fast-relaxed-math", + "-cl-std=", + }; + const char * incompatible_defs[] = { + "GET_FLOAT_WORD", + "__NV_CL_C_VERSION", + "GEN7_SAMPLER_CLAMP_BORDER_W
[Beignet] [PATCH 3/4] add [opencl-1.2] API clLinkProgram.
From: Luo this API links a set of compiled program objects and libraries for all the devices or a specific device(s) in the OpenCL context and creates an executable. the llvm bitcode in the compiled program objects are linked together and built to Gen binary. Signed-off-by: Luo --- backend/src/backend/gen_program.cpp | 116 backend/src/backend/program.cpp | 23 ++- backend/src/backend/program.h | 28 + src/cl_api.c| 33 ++ src/cl_program.c| 69 +++-- src/cl_program.h| 7 +++ 6 files changed, 257 insertions(+), 19 deletions(-) diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index 7019060..dc885d5 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -33,6 +33,9 @@ #include "llvm/IR/DataLayout.h" #endif /* LLVM_VERSION_MINOR <= 2 */ +#include "llvm/Linker.h" +#include "llvm/Transforms/Utils/Cloning.h" + #include "backend/program.h" #include "backend/gen_program.h" #include "backend/gen_program.hpp" @@ -222,6 +225,116 @@ namespace gbe { // Everything run fine return (gbe_program) program; } + + static gbe_program genProgramNewGenProgram(uint32_t deviceID, const void* module, const void* llvm_ctx) + { +using namespace gbe; +GenProgram *program = GBE_NEW(GenProgram, deviceID, module, llvm_ctx); +// Everything run fine +return (gbe_program) program; + } + + static void genProgramLinkFromLLVM(gbe_program dst_program, + gbe_program src_program, + size_tstringSize, + char *err, + size_t * errSize) + { +using namespace gbe; +std::string errMsg; +if(((GenProgram*)dst_program)->module == NULL){ + ((GenProgram*)dst_program)->module = llvm::CloneModule((llvm::Module*)((GenProgram*)src_program)->module); + errSize = 0; +}else{ + llvm::Module* src = (llvm::Module*)((GenProgram*)src_program)->module; + llvm::GlobalVariable* gv = src->getNamedGlobal("PIo2"); + gv->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + gv = src->getNamedGlobal("npio2_hw"); + gv->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + gv = src->getNamedGlobal("two_over_pi"); + gv->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + gv = src->getNamedGlobal("atanhi"); + gv->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + gv = src->getNamedGlobal("atanlo"); + gv->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + + llvm::Function* fc = src->getFunction("barrier"); + fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + fc = src->getFunction("__gen_memset_p"); + fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + fc = src->getFunction("__gen_memset_g"); + fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + fc = src->getFunction("__gen_memset_l"); + fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + fc = src->getFunction("__gen_memcpy_gg"); + fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + fc = src->getFunction("__gen_memcpy_gp"); + fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + fc = src->getFunction("__gen_memcpy_gl"); + fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + fc = src->getFunction("__gen_memcpy_pg"); + fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + fc = src->getFunction("__gen_memcpy_pp"); + fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + fc = src->getFunction("__gen_memcpy_pl"); + fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + fc = src->getFunction("__gen_memcpy_lg"); + fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + fc = src->getFunction("__gen_memcpy_lp"); + fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + fc = src->getFunction("__gen_memcpy_ll"); + fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + + llvm::Module* dst = (llvm::Module*)((GenProgram*)dst_program)->module; + llvm::Linker::LinkModules( dst, + src, + llvm::Linker::PreserveSource, + &errMsg); + if (errMsg.c_str() != NULL) { +if (err != NULL && errSize != NULL && stringSize > 0u) { + if(errMsg.length() < stringSize ) +stringSize = errMsg.length(); + strcpy(err, errMsg.c_str()); + err[stringSize+1] = '\0'; +} + } + printf("%s\n", err); +} +// Everything run fine + } + + static void genProgramBuildFromLLVM(gbe_program program, + size_t stringSize, +
[Beignet] [PATCH 4/4] add [opencl-1.2] test case runtime_cl.
From: Luo --- utests/CMakeLists.txt | 1 + utests/runtime_compile_link.cpp | 127 2 files changed, 128 insertions(+) create mode 100644 utests/runtime_compile_link.cpp diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 5f0649f..c6d4098 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -153,6 +153,7 @@ set (utests_sources runtime_createcontext.cpp runtime_null_kernel_arg.cpp runtime_event.cpp + runtime_compile_link.cpp compiler_double.cpp compiler_double_2.cpp compiler_double_3.cpp diff --git a/utests/runtime_compile_link.cpp b/utests/runtime_compile_link.cpp new file mode 100644 index 000..df55ab8 --- /dev/null +++ b/utests/runtime_compile_link.cpp @@ -0,0 +1,127 @@ +#include +#include +#include +#include "utest_helper.hpp" +#include "utest_file_map.hpp" + +#define BUFFERSIZE 32*1024 + +int init_program(const char* name, cl_context ctx, cl_program *pg ) +{ + cl_int err; + char* ker_path = cl_do_kiss_path(name, device); + + cl_file_map_t *fm = cl_file_map_new(); + err = cl_file_map_open(fm, ker_path); + if(err != CL_FILE_MAP_SUCCESS) +OCL_ASSERT(0); + const char *src = cl_file_map_begin(fm); + + *pg = clCreateProgramWithSource(ctx, 1, &src, NULL, &err); + free(ker_path); + cl_file_map_delete(fm); + return 0; + +} + +void runtime_cl(void) +{ + + cl_int err; + + const char* header_file_name="multi2.h"; + cl_program foo_pg; + init_program(header_file_name, ctx, &foo_pg); + + const char* myinc_file_name="mydir/multi3.h"; + cl_program myinc_pg; + init_program(myinc_file_name, ctx, &myinc_pg); + + const char* file_name_A="multi_A.cl"; + cl_program program_A; + init_program(file_name_A, ctx, &program_A); + + cl_program input_headers[2] = { foo_pg, myinc_pg}; + const char * input_header_names[2] = { "multi2.h", "mydir/multi3.h"}; + + err = clCompileProgram(program_A, +0, NULL, // num_devices & device_list +NULL, // compile_options +2, // num_input_headers +input_headers, +input_header_names, +NULL, NULL); + + OCL_ASSERT(err==CL_SUCCESS); + const char* file_name_B="multi_B.cl"; + cl_program program_B; + init_program(file_name_B, ctx, &program_B); + + err = clCompileProgram(program_B, +0, NULL, // num_devices & device_list +NULL, // compile_options +2, // num_input_headers +input_headers, +input_header_names, +NULL, NULL); + + OCL_ASSERT(err==CL_SUCCESS); + cl_program input_programs[2] = { program_A, program_B}; + cl_program linked_program = clLinkProgram(ctx, 0, NULL, NULL, 2, input_programs, NULL, NULL, &err); + + + OCL_ASSERT(linked_program != NULL); + OCL_ASSERT(err == CL_SUCCESS); + + // link success, run this kernel. + + const size_t n = 16; + int64_t src1[n], src2[n]; + + src1[0] = (int64_t)1 << 63, src2[0] = 0x7FFFll; + src1[1] = (int64_t)1 << 63, src2[1] = ((int64_t)1 << 63) | 1; + src1[2] = -1ll, src2[2] = 0; + src1[3] = ((int64_t)123 << 32) | 0x7FFF, src2[3] = ((int64_t)123 << 32) | 0x8000; + src1[4] = 0x7FFFll, src2[4] = (int64_t)1 << 63; + src1[5] = ((int64_t)1 << 63) | 1, src2[5] = (int64_t)1 << 63; + src1[6] = 0, src2[6] = -1ll; + src1[7] = ((int64_t)123 << 32) | 0x8000, src2[7] = ((int64_t)123 << 32) | 0x7FFF; + for(size_t i=8; ihttp://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH 1/4] [opencl-1.2]remove the code of saving the llvm bitcode to file, replace it with module pointer.
From: Luo Save the global LLVMContext and module pointer to GenProgram, delete the module pointer in the destructor. Signed-off-by: Luo --- backend/src/backend/gen_program.cpp | 31 +--- backend/src/backend/gen_program.hpp | 4 +++- backend/src/backend/program.cpp | 47 ++--- backend/src/backend/program.h | 3 ++- backend/src/backend/program.hpp | 2 +- backend/src/llvm/llvm_to_gen.cpp| 16 - backend/src/llvm/llvm_to_gen.hpp| 2 +- src/cl_program.c| 2 +- 8 files changed, 59 insertions(+), 48 deletions(-) diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index 52db904..74b6fa1 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -22,6 +22,17 @@ * \author Benjamin Segovia */ +#include "llvm/Config/config.h" +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2 +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/DataLayout.h" +#else +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/DataLayout.h" +#endif /* LLVM_VERSION_MINOR <= 2 */ + #include "backend/program.h" #include "backend/gen_program.h" #include "backend/gen_program.hpp" @@ -33,6 +44,8 @@ #include "ir/unit.hpp" #include "llvm/llvm_to_gen.hpp" +#include + #include #include #include @@ -72,7 +85,17 @@ namespace gbe { fclose(f); } - GenProgram::~GenProgram(void) {} + GenProgram::~GenProgram(void){ +if(module){ + delete (llvm::Module*)module; + module = NULL; +} + +if(llvm_ctx){ + delete (llvm::LLVMContext*)llvm_ctx; + llvm_ctx = NULL; +} + } /*! We must avoid spilling at all cost with Gen */ static const struct CodeGenStrategy { @@ -177,16 +200,18 @@ namespace gbe { static gbe_program genProgramNewFromLLVM(uint32_t deviceID, const char *fileName, + const void* module, + const void* llvm_ctx, size_t stringSize, char *err, size_t *errSize, int optLevel) { using namespace gbe; -GenProgram *program = GBE_NEW(GenProgram, deviceID); +GenProgram *program = GBE_NEW(GenProgram, deviceID, module, llvm_ctx); std::string error; // Try to compile the program -if (program->buildFromLLVMFile(fileName, error, optLevel) == false) { +if (program->buildFromLLVMFile(fileName, module, error, optLevel) == false) { if (err != NULL && errSize != NULL && stringSize > 0u) { const size_t msgSize = std::min(error.size(), stringSize-1u); std::memcpy(err, error.c_str(), msgSize); diff --git a/backend/src/backend/gen_program.hpp b/backend/src/backend/gen_program.hpp index ea54b49..70794c9 100644 --- a/backend/src/backend/gen_program.hpp +++ b/backend/src/backend/gen_program.hpp @@ -58,7 +58,7 @@ namespace gbe { public: /*! Create an empty program */ -GenProgram(uint32_t deviceID) : deviceID(deviceID) {} +GenProgram(uint32_t deviceID, const void* mod = NULL, const void* ctx = NULL) : deviceID(deviceID),module((void*)mod), llvm_ctx((void*)ctx) {} /*! Current device ID*/ uint32_t deviceID; /*! Destroy the program */ @@ -69,6 +69,8 @@ namespace gbe virtual Kernel *allocateKernel(const std::string &name) { return GBE_NEW(GenKernel, name); } +void* module; +void* llvm_ctx; /*! Use custom allocators */ GBE_CLASS(GenProgram); }; diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp index bdc7d34..90306cc 100644 --- a/backend/src/backend/program.cpp +++ b/backend/src/backend/program.cpp @@ -34,6 +34,8 @@ #include "llvm/Config/config.h" #include "llvm/Support/Threading.h" #include "llvm/Support/ManagedStatic.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/IR/LLVMContext.h" #include #include #include @@ -102,9 +104,9 @@ namespace gbe { BVAR(OCL_OUTPUT_GEN_IR, false); - bool Program::buildFromLLVMFile(const char *fileName, std::string &error, int optLevel) { + bool Program::buildFromLLVMFile(const char *fileName, const void* module, std::string &error, int optLevel) { ir::Unit *unit = new ir::Unit(); -if (llvmToGen(*unit, fileName, optLevel) == false) { +if (llvmToGen(*unit, fileName, module, optLevel) == false) { error = std::string(fileName) + " not found"; return false; } @@ -113,7 +115,7 @@ namespace gbe { if(!unit->getValid()) { delete unit; //clear unit unit = new ir::Unit(); - llvmToGen(*unit, fileName, 0); //suppose file exists and llvmToGen will not return false. + llvmToGen(*unit, fileName, module, 0);
[Beignet] [PATCH V2] remove the code of saving the llvm bitcode to file, replace it with llvm::Module
From: Luo Save the global LLVMContext and module pointer to GenProgram, delete the module pointer in the destructor. Signed-off-by: Luo --- backend/src/backend/gen_program.cpp | 33 +++-- backend/src/backend/gen_program.hpp | 4 ++- backend/src/backend/program.cpp | 59 - backend/src/backend/program.h | 3 +- backend/src/backend/program.hpp | 2 +- backend/src/llvm/llvm_to_gen.cpp| 16 ++ backend/src/llvm/llvm_to_gen.hpp| 2 +- src/cl_program.c| 2 +- 8 files changed, 73 insertions(+), 48 deletions(-) diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index d2e95d4..33f2ed6 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -22,6 +22,17 @@ * \author Benjamin Segovia */ +#include "llvm/Config/config.h" +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2 +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/DataLayout.h" +#else +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/DataLayout.h" +#endif /* LLVM_VERSION_MINOR <= 2 */ + #include "backend/program.h" #include "backend/gen_program.h" #include "backend/gen_program.hpp" @@ -33,6 +44,8 @@ #include "ir/unit.hpp" #include "llvm/llvm_to_gen.hpp" +#include + #include #include #include @@ -74,7 +87,19 @@ namespace gbe { #endif } - GenProgram::~GenProgram(void) {} + GenProgram::~GenProgram(void){ +#ifdef GBE_COMPILER_AVAILABLE +if(module){ + delete (llvm::Module*)module; + module = NULL; +} + +if(llvm_ctx){ + delete (llvm::LLVMContext*)llvm_ctx; + llvm_ctx = NULL; +} +#endif + } /*! We must avoid spilling at all cost with Gen */ static const struct CodeGenStrategy { @@ -182,17 +207,19 @@ namespace gbe { static gbe_program genProgramNewFromLLVM(uint32_t deviceID, const char *fileName, + const void* module, + const void* llvm_ctx, size_t stringSize, char *err, size_t *errSize, int optLevel) { using namespace gbe; -GenProgram *program = GBE_NEW(GenProgram, deviceID); +GenProgram *program = GBE_NEW(GenProgram, deviceID, module, llvm_ctx); #ifdef GBE_COMPILER_AVAILABLE std::string error; // Try to compile the program -if (program->buildFromLLVMFile(fileName, error, optLevel) == false) { +if (program->buildFromLLVMFile(fileName, module, error, optLevel) == false) { if (err != NULL && errSize != NULL && stringSize > 0u) { const size_t msgSize = std::min(error.size(), stringSize-1u); std::memcpy(err, error.c_str(), msgSize); diff --git a/backend/src/backend/gen_program.hpp b/backend/src/backend/gen_program.hpp index ea54b49..70794c9 100644 --- a/backend/src/backend/gen_program.hpp +++ b/backend/src/backend/gen_program.hpp @@ -58,7 +58,7 @@ namespace gbe { public: /*! Create an empty program */ -GenProgram(uint32_t deviceID) : deviceID(deviceID) {} +GenProgram(uint32_t deviceID, const void* mod = NULL, const void* ctx = NULL) : deviceID(deviceID),module((void*)mod), llvm_ctx((void*)ctx) {} /*! Current device ID*/ uint32_t deviceID; /*! Destroy the program */ @@ -69,6 +69,8 @@ namespace gbe virtual Kernel *allocateKernel(const std::string &name) { return GBE_NEW(GenKernel, name); } +void* module; +void* llvm_ctx; /*! Use custom allocators */ GBE_CLASS(GenProgram); }; diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp index 949aeb4..26d9454 100644 --- a/backend/src/backend/program.cpp +++ b/backend/src/backend/program.cpp @@ -34,6 +34,8 @@ #include "llvm/Config/config.h" #include "llvm/Support/Threading.h" #include "llvm/Support/ManagedStatic.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/IR/LLVMContext.h" #include #include #include @@ -103,9 +105,13 @@ namespace gbe { #ifdef GBE_COMPILER_AVAILABLE BVAR(OCL_OUTPUT_GEN_IR, false); - bool Program::buildFromLLVMFile(const char *fileName, std::string &error, int optLevel) { + bool Program::buildFromLLVMFile(const char *fileName, const void* module, std::string &error, int optLevel) { ir::Unit *unit = new ir::Unit(); -if (llvmToGen(*unit, fileName, optLevel) == false) { +llvm::Module * cloned_module = NULL; +if(module){ + cloned_module = llvm::CloneModule((llvm::Module*)module); +} +if (llvmToGen(*unit, fileName, module, optLevel) == false) { error = std::string(fileName) + " not found"; return false; } @@ -114,11 +120,18 @@ namespace gbe { if(!unit->getValid
[Beignet] [PATCH V2 2/3] add [opencl-1.2] API clLinkProgram.
From: Luo this API links a set of compiled program objects and libraries for all the devices or a specific device(s) in the OpenCL context and creates an executable. the llvm bitcode in the compiled program objects are linked together and built to Gen binary. Signed-off-by: Luo --- backend/src/backend/gen_program.cpp | 120 backend/src/backend/program.cpp | 33 +++--- backend/src/backend/program.h | 28 + src/cl_api.c| 33 ++ src/cl_gbe_loader.cpp | 12 src/cl_program.c| 69 - src/cl_program.h| 7 +++ 7 files changed, 277 insertions(+), 25 deletions(-) diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index 33f2ed6..d7cb898 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -33,6 +33,9 @@ #include "llvm/IR/DataLayout.h" #endif /* LLVM_VERSION_MINOR <= 2 */ +#include "llvm/Linker.h" +#include "llvm/Transforms/Utils/Cloning.h" + #include "backend/program.h" #include "backend/gen_program.h" #include "backend/gen_program.hpp" @@ -232,6 +235,120 @@ namespace gbe { // Everything run fine return (gbe_program) program; } + + static gbe_program genProgramNewGenProgram(uint32_t deviceID, const void* module, const void* llvm_ctx) + { +using namespace gbe; +GenProgram *program = GBE_NEW(GenProgram, deviceID, module, llvm_ctx); +// Everything run fine +return (gbe_program) program; + } + + static void genProgramLinkFromLLVM(gbe_program dst_program, + gbe_program src_program, + size_tstringSize, + char *err, + size_t * errSize) + { +#ifdef GBE_COMPILER_AVAILABLE +using namespace gbe; +std::string errMsg; +if(((GenProgram*)dst_program)->module == NULL){ + ((GenProgram*)dst_program)->module = llvm::CloneModule((llvm::Module*)((GenProgram*)src_program)->module); + errSize = 0; +}else{ + llvm::Module* src = (llvm::Module*)((GenProgram*)src_program)->module; + llvm::GlobalVariable* gv = src->getNamedGlobal("PIo2"); + gv->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + gv = src->getNamedGlobal("npio2_hw"); + gv->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + gv = src->getNamedGlobal("two_over_pi"); + gv->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + gv = src->getNamedGlobal("atanhi"); + gv->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + gv = src->getNamedGlobal("atanlo"); + gv->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + + llvm::Function* fc = src->getFunction("barrier"); + fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + fc = src->getFunction("__gen_memset_p"); + fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + fc = src->getFunction("__gen_memset_g"); + fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + fc = src->getFunction("__gen_memset_l"); + fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + fc = src->getFunction("__gen_memcpy_gg"); + fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + fc = src->getFunction("__gen_memcpy_gp"); + fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + fc = src->getFunction("__gen_memcpy_gl"); + fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + fc = src->getFunction("__gen_memcpy_pg"); + fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + fc = src->getFunction("__gen_memcpy_pp"); + fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + fc = src->getFunction("__gen_memcpy_pl"); + fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + fc = src->getFunction("__gen_memcpy_lg"); + fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + fc = src->getFunction("__gen_memcpy_lp"); + fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + fc = src->getFunction("__gen_memcpy_ll"); + fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + + llvm::Module* dst = (llvm::Module*)((GenProgram*)dst_program)->module; + llvm::Linker::LinkModules( dst, + src, + llvm::Linker::PreserveSource, + &errMsg); + if (errMsg.c_str() != NULL) { +if (err != NULL && errSize != NULL && stringSize > 0u) { + if(errMsg.length() < stringSize ) +stringSize = errMsg.length(); + strcpy(err, errMsg.c_str()); + err[stringSize+1] = '\0'; +} + } + printf("%s\n", err); +} +// Everything run fine +#endif + } + + static void genProgramBuildFromLLVM(gbe_prog
[Beignet] [PATCH V2 3/3] add [opencl-1.2] test case runtime_cl.
From: Luo Signed-off-by: Luo --- kernels/multi2.h| 1 + kernels/multi_A.cl | 13 kernels/multi_B.cl | 9 +++ kernels/mydir/multi3.h | 4 ++ utests/CMakeLists.txt | 1 + utests/runtime_compile_link.cpp | 127 6 files changed, 155 insertions(+) create mode 100644 kernels/multi2.h create mode 100644 kernels/multi_A.cl create mode 100644 kernels/multi_B.cl create mode 100644 kernels/mydir/multi3.h create mode 100644 utests/runtime_compile_link.cpp diff --git a/kernels/multi2.h b/kernels/multi2.h new file mode 100644 index 000..ae2c56e --- /dev/null +++ b/kernels/multi2.h @@ -0,0 +1 @@ +int comp_long(long x, long y); diff --git a/kernels/multi_A.cl b/kernels/multi_A.cl new file mode 100644 index 000..9282b8d --- /dev/null +++ b/kernels/multi_A.cl @@ -0,0 +1,13 @@ +#include "multi2.h" +#include "mydir/multi3.h" + +int comp_long(long x, long y) +{ + return x < y ; +} + +kernel void multi_A(global long *src1, global long *src2, global long *dst) { + int i = get_global_id(0); + int j = comp_long(src1[i], src2[i]); + dst[i] = j ? 3 : 4; +} diff --git a/kernels/multi_B.cl b/kernels/multi_B.cl new file mode 100644 index 000..de147eb --- /dev/null +++ b/kernels/multi_B.cl @@ -0,0 +1,9 @@ +#include "multi2.h" +#include "mydir/multi3.h" + +kernel void multi_B(global long *src1, global long *src2, global long *dst) { + int i = get_global_id(0); + int j = comp_long(src1[i], src2[i]); + dst[i] = j ? 3 : 4; + int k = greater(src1[i], src2[i]); +} diff --git a/kernels/mydir/multi3.h b/kernels/mydir/multi3.h new file mode 100644 index 000..4011278 --- /dev/null +++ b/kernels/mydir/multi3.h @@ -0,0 +1,4 @@ +inline int greater(long x, long y) +{ + return x > y ; +} diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 698c9ff..bee3e8f 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -157,6 +157,7 @@ set (utests_sources runtime_event.cpp runtime_barrier_list.cpp runtime_marker_list.cpp + runtime_compile_link.cpp compiler_double.cpp compiler_double_2.cpp compiler_double_3.cpp diff --git a/utests/runtime_compile_link.cpp b/utests/runtime_compile_link.cpp new file mode 100644 index 000..df55ab8 --- /dev/null +++ b/utests/runtime_compile_link.cpp @@ -0,0 +1,127 @@ +#include +#include +#include +#include "utest_helper.hpp" +#include "utest_file_map.hpp" + +#define BUFFERSIZE 32*1024 + +int init_program(const char* name, cl_context ctx, cl_program *pg ) +{ + cl_int err; + char* ker_path = cl_do_kiss_path(name, device); + + cl_file_map_t *fm = cl_file_map_new(); + err = cl_file_map_open(fm, ker_path); + if(err != CL_FILE_MAP_SUCCESS) +OCL_ASSERT(0); + const char *src = cl_file_map_begin(fm); + + *pg = clCreateProgramWithSource(ctx, 1, &src, NULL, &err); + free(ker_path); + cl_file_map_delete(fm); + return 0; + +} + +void runtime_cl(void) +{ + + cl_int err; + + const char* header_file_name="multi2.h"; + cl_program foo_pg; + init_program(header_file_name, ctx, &foo_pg); + + const char* myinc_file_name="mydir/multi3.h"; + cl_program myinc_pg; + init_program(myinc_file_name, ctx, &myinc_pg); + + const char* file_name_A="multi_A.cl"; + cl_program program_A; + init_program(file_name_A, ctx, &program_A); + + cl_program input_headers[2] = { foo_pg, myinc_pg}; + const char * input_header_names[2] = { "multi2.h", "mydir/multi3.h"}; + + err = clCompileProgram(program_A, +0, NULL, // num_devices & device_list +NULL, // compile_options +2, // num_input_headers +input_headers, +input_header_names, +NULL, NULL); + + OCL_ASSERT(err==CL_SUCCESS); + const char* file_name_B="multi_B.cl"; + cl_program program_B; + init_program(file_name_B, ctx, &program_B); + + err = clCompileProgram(program_B, +0, NULL, // num_devices & device_list +NULL, // compile_options +2, // num_input_headers +input_headers, +input_header_names, +NULL, NULL); + + OCL_ASSERT(err==CL_SUCCESS); + cl_program input_programs[2] = { program_A, program_B}; + cl_program linked_program = clLinkProgram(ctx, 0, NULL, NULL, 2, input_programs, NULL, NULL, &err); + + + OCL_ASSERT(linked_program != NULL); + OCL_ASSERT(err == CL_SUCCESS); + + // link success, run this kernel. + + const size_t n = 16; + int64_t src1[n], src2[n]; + + src1[0] = (int64_t)1 << 63, src2[0] = 0x7FFFll; + src1[1] = (int64_t)1 << 63, src2[1] = ((int64_t)1 << 63) | 1; + src1[2] = -1ll, src2[2] = 0; + src1[3] = ((int64_t)123 << 32) | 0x7FFF, src2[3] = ((int64_t)123 << 32) | 0x8000; +
[Beignet] [PATCH V2 0/3] opencl-1.2 compile/link implementation.
From: Luo 1. this patchset depends on the patch from master called "remove the code of saving the llvm bitcode to file, replace it with llvm::Module"; 2. clBuildProgram path is independent of clCompileProgram/clLinkProgram. clBuildProgram allocates new LLVMContext, compile/link use the global LLVMContext. Luo (3): add [opencl-1.2] API clCompileProgram. add [opencl-1.2] API clLinkProgram. add [opencl-1.2] test case runtime_cl. backend/src/backend/gen_program.cpp | 120 backend/src/backend/program.cpp | 178 backend/src/backend/program.h | 38 kernels/multi2.h| 1 + kernels/multi_A.cl | 13 +++ kernels/multi_B.cl | 9 ++ kernels/mydir/multi3.h | 4 + src/cl_api.c| 74 +++ src/cl_gbe_loader.cpp | 16 src/cl_program.c| 138 src/cl_program.h| 15 ++- utests/CMakeLists.txt | 1 + utests/runtime_compile_link.cpp | 127 + 13 files changed, 733 insertions(+), 1 deletion(-) create mode 100644 kernels/multi2.h create mode 100644 kernels/multi_A.cl create mode 100644 kernels/multi_B.cl create mode 100644 kernels/mydir/multi3.h create mode 100644 utests/runtime_compile_link.cpp -- 1.8.1.2 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH V2 1/3] add [opencl-1.2] API clCompileProgram.
From: Luo This API compiles a program's source for all the devices or a specific device in the OpenCL context associated with program. The pre-processor runs before the program sources are compiled. Signed-off-by: Luo --- backend/src/backend/program.cpp | 161 backend/src/backend/program.h | 10 +++ src/cl_api.c| 41 ++ src/cl_gbe_loader.cpp | 4 + src/cl_program.c| 103 + src/cl_program.h| 8 +- 6 files changed, 326 insertions(+), 1 deletion(-) diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp index fab6bce..5af66fb 100644 --- a/backend/src/backend/program.cpp +++ b/backend/src/backend/program.cpp @@ -842,6 +842,165 @@ namespace gbe { } #endif +#ifdef GBE_COMPILER_AVAILABLE + static gbe_program programCompileFromSource(uint32_t deviceID, + const char *source, + const char *temp_header_path, + size_t stringSize, + const char *options, + char *err, + size_t *errSize) + { +char clStr[] = "/tmp/XX.cl"; +int clFd = mkstemps(clStr, 3); +const std::string clName = std::string(clStr); +std::string clOpt; + +FILE *clFile = fdopen(clFd, "w"); +FATAL_IF(clFile == NULL, "Failed to open temporary file"); + +bool usePCH = OCL_USE_PCH; +bool findPCH = false; + +/* Because our header file is so big, we want to avoid recompile the header from + scratch. We use the PCH support of Clang to save the huge compiling time. + We just use the most general build opt to build the PCH header file, so if + user pass new build options here, the PCH can not pass the Clang's compitable + validating. Clang will do three kinds of compatible check: Language Option, + Target Option and Preprocessing Option. Other kinds of options such as the + CodeGen options will not affect the AST result, so no need to check. + + According to OpenCL 1.1's spec, the CL build options: + -D name=definition + If the definition is not used in our header, it is compitable + + -cl-single-precision-constant + -cl-denorms-are-zero + -cl-std= + Language options, really affect. + + -cl-opt-disable + -cl-mad-enable + -cl-no-signed-zeros + -cl-unsafe-math-optimizations + -cl-finite-math-only + -cl-fast-relaxed-math + CodeGen options, not affect + + -Werror + -w + Our header should not block the compiling because of warning. + + So we just disable the PCH validation of Clang and do the judgement by ourself. */ + +if(options) { + char *p; + /* FIXME: Though we can disable the pch valid check, and load pch successfully, + but these language opts and pre-defined macro will still generate the diag msg + to the diag engine of the Clang and cause the Clang to report error. + We filter them all here to avoid these. */ + const char * incompatible_opts[] = { + "-cl-single-precision-constant", +//"-cl-denorms-are-zero", + "-cl-fast-relaxed-math", + "-cl-std=", + }; + const char * incompatible_defs[] = { + "GET_FLOAT_WORD", + "__NV_CL_C_VERSION", + "GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND" + }; + + for (unsigned int i = 0; i < sizeof(incompatible_opts)/sizeof(char *); i++ ) { +p = strstr(const_cast(options), incompatible_opts[i]); +if (p) { + usePCH = false; + break; +} + } + + if (usePCH) { +for (unsigned int i = 0; i < sizeof(incompatible_defs)/sizeof(char *); i++ ) { + p = strstr(const_cast(options), incompatible_defs[i]); + if (p) { +usePCH = false; +break; + } +} + } + + + clOpt += options; +} + +std::string dirs = OCL_PCH_PATH; +std::istringstream idirs(dirs); +std::string pchFileName; + +while (getline(idirs, pchFileName, ':')) { + if(access(pchFileName.c_str(), R_OK) == 0) { +findPCH = true; +break; + } +} + +if (usePCH && findPCH) { + clOpt += " -include-pch "; + clOpt += pchFileName; + clOpt += " "; +} else + fwrite(ocl_stdlib_str.c_str(), strlen(ocl_stdlib_str.c_str()), 1, clFile); + +if (!OCL_STRICT_CONFORMANCE) { +fwrite(ocl_mathfunc_fastpath_str.c_str(), strlen(ocl_mathfunc_fastpath_str.c_str()), 1, clFile); +} + +//for clCompilerProgram usage. +if(temp_header_path){ + clOpt += " -I "; + clOpt += temp_header_path; + clOpt += " "; +} + +// reset the file number in case we have inse
[Beignet] [PATCH V3 2/3] add [opencl-1.2] API clLinkProgram.
From: Luo this API links a set of compiled program objects and libraries for all the devices or a specific device(s) in the OpenCL context and creates an executable. the llvm bitcode in the compiled program objects are linked together and built to Gen binary. Signed-off-by: Luo --- backend/src/backend/gen_program.cpp | 95 + backend/src/backend/program.cpp | 28 +-- backend/src/backend/program.h | 28 +++ src/cl_api.c| 33 + src/cl_gbe_loader.cpp | 12 + src/cl_program.c| 69 --- src/cl_program.h| 7 +++ 7 files changed, 251 insertions(+), 21 deletions(-) diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index 33f2ed6..bb1b4df 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -33,6 +33,9 @@ #include "llvm/IR/DataLayout.h" #endif /* LLVM_VERSION_MINOR <= 2 */ +#include "llvm/Linker.h" +#include "llvm/Transforms/Utils/Cloning.h" + #include "backend/program.h" #include "backend/gen_program.h" #include "backend/gen_program.hpp" @@ -51,6 +54,7 @@ #include #include #include +#include namespace gbe { @@ -232,6 +236,94 @@ namespace gbe { // Everything run fine return (gbe_program) program; } + + static gbe_program genProgramNewGenProgram(uint32_t deviceID, const void* module, const void* llvm_ctx) + { +using namespace gbe; +GenProgram *program = GBE_NEW(GenProgram, deviceID, module, llvm_ctx); +// Everything run fine +return (gbe_program) program; + } + + static void genProgramLinkFromLLVM(gbe_program dst_program, + gbe_program src_program, + size_tstringSize, + char *err, + size_t * errSize) + { +#ifdef GBE_COMPILER_AVAILABLE +using namespace gbe; +std::string errMsg; +if(((GenProgram*)dst_program)->module == NULL){ + ((GenProgram*)dst_program)->module = llvm::CloneModule((llvm::Module*)((GenProgram*)src_program)->module); + errSize = 0; +}else{ + //set the global variables and functions to link once to fix redefine. + llvm::Module* src = (llvm::Module*)((GenProgram*)src_program)->module; + for (llvm::Module::global_iterator I = src->global_begin(), E = src->global_end(); I != E; ++I) { +I->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + } + + for (llvm::Module::iterator I = src->begin(), E = src->end(); I != E; ++I) { +I->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + } + + llvm::Module* dst = (llvm::Module*)((GenProgram*)dst_program)->module; + llvm::Linker::LinkModules( dst, + src, + llvm::Linker::PreserveSource, + &errMsg); + if (errMsg.c_str() != NULL) { +if (err != NULL && errSize != NULL && stringSize > 0u) { + if(errMsg.length() < stringSize ) +stringSize = errMsg.length(); + strcpy(err, errMsg.c_str()); + err[stringSize+1] = '\0'; +} + } + printf("%s\n", err); +} +// Everything run fine +#endif + } + + static void genProgramBuildFromLLVM(gbe_program program, + size_t stringSize, + char *err, + size_t *errSize, + const char * options) + { +#ifdef GBE_COMPILER_AVAILABLE +using namespace gbe; +std::string error; + +int optLevel = 1; + +if(options) { + char *p; + p = strstr(const_cast(options), "-cl-opt-disable"); + if (p) +optLevel = 0; +} + +GenProgram* p = (GenProgram*) program; +// Try to compile the program +static std::mutex gbe_mutex; +gbe_mutex.lock(); +llvm::Module* module = (llvm::Module*)p->module; + +if (p->buildFromLLVMFile(NULL, module, error, optLevel) == false) { + if (err != NULL && errSize != NULL && stringSize > 0u) { +const size_t msgSize = std::min(error.size(), stringSize-1u); +std::memcpy(err, error.c_str(), msgSize); +*errSize = error.size(); + } + gbe_mutex.unlock(); + GBE_DELETE(p); +} +#endif + } + } /* namespace gbe */ void genSetupCallBacks(void) @@ -239,4 +331,7 @@ void genSetupCallBacks(void) gbe_program_new_from_binary = gbe::genProgramNewFromBinary; gbe_program_serialize_to_binary = gbe::genProgramSerializeToBinary; gbe_program_new_from_llvm = gbe::genProgramNewFromLLVM; + gbe_program_new_gen_program = gbe::genProgramNewGenProgram; + gbe_program_link_from_llvm = gbe::genProgramLinkFromLLVM; + gbe_progr
[Beignet] [PATCH V3 1/3] add [opencl-1.2] API clCompileProgram.
From: Luo This API compiles a program's source for all the devices or a specific device in the OpenCL context associated with program. The pre-processor runs before the program sources are compiled. Signed-off-by: Luo --- backend/src/backend/program.cpp | 162 backend/src/backend/program.h | 10 +++ src/cl_api.c| 41 ++ src/cl_gbe_loader.cpp | 4 + src/cl_program.c| 109 +++ src/cl_program.h| 8 +- 6 files changed, 333 insertions(+), 1 deletion(-) diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp index fab6bce..7cfc07e 100644 --- a/backend/src/backend/program.cpp +++ b/backend/src/backend/program.cpp @@ -842,6 +842,166 @@ namespace gbe { } #endif +#ifdef GBE_COMPILER_AVAILABLE + static gbe_program programCompileFromSource(uint32_t deviceID, + const char *source, + const char *temp_header_path, + size_t stringSize, + const char *options, + char *err, + size_t *errSize) + { +char clStr[] = "/tmp/XX.cl"; +int clFd = mkstemps(clStr, 3); +const std::string clName = std::string(clStr); +std::string clOpt; + +FILE *clFile = fdopen(clFd, "w"); +FATAL_IF(clFile == NULL, "Failed to open temporary file"); + +bool usePCH = OCL_USE_PCH; +bool findPCH = false; + +/* Because our header file is so big, we want to avoid recompile the header from + scratch. We use the PCH support of Clang to save the huge compiling time. + We just use the most general build opt to build the PCH header file, so if + user pass new build options here, the PCH can not pass the Clang's compitable + validating. Clang will do three kinds of compatible check: Language Option, + Target Option and Preprocessing Option. Other kinds of options such as the + CodeGen options will not affect the AST result, so no need to check. + + According to OpenCL 1.1's spec, the CL build options: + -D name=definition + If the definition is not used in our header, it is compitable + + -cl-single-precision-constant + -cl-denorms-are-zero + -cl-std= + Language options, really affect. + + -cl-opt-disable + -cl-mad-enable + -cl-no-signed-zeros + -cl-unsafe-math-optimizations + -cl-finite-math-only + -cl-fast-relaxed-math + CodeGen options, not affect + + -Werror + -w + Our header should not block the compiling because of warning. + + So we just disable the PCH validation of Clang and do the judgement by ourself. */ + +if(options) { + char *p; + /* FIXME: Though we can disable the pch valid check, and load pch successfully, + but these language opts and pre-defined macro will still generate the diag msg + to the diag engine of the Clang and cause the Clang to report error. + We filter them all here to avoid these. */ + const char * incompatible_opts[] = { + "-cl-single-precision-constant", +//"-cl-denorms-are-zero", + "-cl-fast-relaxed-math", + "-cl-std=", + }; + const char * incompatible_defs[] = { + "GET_FLOAT_WORD", + "__NV_CL_C_VERSION", + "GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND" + }; + + for (unsigned int i = 0; i < sizeof(incompatible_opts)/sizeof(char *); i++ ) { +p = strstr(const_cast(options), incompatible_opts[i]); +if (p) { + usePCH = false; + break; +} + } + + if (usePCH) { +for (unsigned int i = 0; i < sizeof(incompatible_defs)/sizeof(char *); i++ ) { + p = strstr(const_cast(options), incompatible_defs[i]); + if (p) { +usePCH = false; +break; + } +} + } + + + clOpt += options; +} + +std::string dirs = OCL_PCH_PATH; +std::istringstream idirs(dirs); +std::string pchFileName; + +while (getline(idirs, pchFileName, ':')) { + if(access(pchFileName.c_str(), R_OK) == 0) { +findPCH = true; +break; + } +} + +if (usePCH && findPCH) { + clOpt += " -include-pch "; + clOpt += pchFileName; + clOpt += " "; +} else + fwrite(ocl_stdlib_str.c_str(), strlen(ocl_stdlib_str.c_str()), 1, clFile); + +if (!OCL_STRICT_CONFORMANCE) { +fwrite(ocl_mathfunc_fastpath_str.c_str(), strlen(ocl_mathfunc_fastpath_str.c_str()), 1, clFile); +} + +//for clCompilerProgram usage. +if(temp_header_path){ + clOpt += " -I "; + clOpt += temp_header_path; + clOpt += " "; +} + +// reset the file number in case we have in
[Beignet] [PATCH V3 3/3] add [opencl-1.2] test case runtime_compile_link.
From: Luo Signed-off-by: Luo --- kernels/multi2.h| 1 + kernels/multi_A.cl | 13 kernels/multi_B.cl | 9 +++ kernels/mydir/multi3.h | 4 ++ utests/CMakeLists.txt | 1 + utests/runtime_compile_link.cpp | 127 6 files changed, 155 insertions(+) create mode 100644 kernels/multi2.h create mode 100644 kernels/multi_A.cl create mode 100644 kernels/multi_B.cl create mode 100644 kernels/mydir/multi3.h create mode 100644 utests/runtime_compile_link.cpp diff --git a/kernels/multi2.h b/kernels/multi2.h new file mode 100644 index 000..ae2c56e --- /dev/null +++ b/kernels/multi2.h @@ -0,0 +1 @@ +int comp_long(long x, long y); diff --git a/kernels/multi_A.cl b/kernels/multi_A.cl new file mode 100644 index 000..9282b8d --- /dev/null +++ b/kernels/multi_A.cl @@ -0,0 +1,13 @@ +#include "multi2.h" +#include "mydir/multi3.h" + +int comp_long(long x, long y) +{ + return x < y ; +} + +kernel void multi_A(global long *src1, global long *src2, global long *dst) { + int i = get_global_id(0); + int j = comp_long(src1[i], src2[i]); + dst[i] = j ? 3 : 4; +} diff --git a/kernels/multi_B.cl b/kernels/multi_B.cl new file mode 100644 index 000..de147eb --- /dev/null +++ b/kernels/multi_B.cl @@ -0,0 +1,9 @@ +#include "multi2.h" +#include "mydir/multi3.h" + +kernel void multi_B(global long *src1, global long *src2, global long *dst) { + int i = get_global_id(0); + int j = comp_long(src1[i], src2[i]); + dst[i] = j ? 3 : 4; + int k = greater(src1[i], src2[i]); +} diff --git a/kernels/mydir/multi3.h b/kernels/mydir/multi3.h new file mode 100644 index 000..4011278 --- /dev/null +++ b/kernels/mydir/multi3.h @@ -0,0 +1,4 @@ +inline int greater(long x, long y) +{ + return x > y ; +} diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 698c9ff..bee3e8f 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -157,6 +157,7 @@ set (utests_sources runtime_event.cpp runtime_barrier_list.cpp runtime_marker_list.cpp + runtime_compile_link.cpp compiler_double.cpp compiler_double_2.cpp compiler_double_3.cpp diff --git a/utests/runtime_compile_link.cpp b/utests/runtime_compile_link.cpp new file mode 100644 index 000..17fe413 --- /dev/null +++ b/utests/runtime_compile_link.cpp @@ -0,0 +1,127 @@ +#include +#include +#include +#include "utest_helper.hpp" +#include "utest_file_map.hpp" + +#define BUFFERSIZE 32*1024 + +int init_program(const char* name, cl_context ctx, cl_program *pg ) +{ + cl_int err; + char* ker_path = cl_do_kiss_path(name, device); + + cl_file_map_t *fm = cl_file_map_new(); + err = cl_file_map_open(fm, ker_path); + if(err != CL_FILE_MAP_SUCCESS) +OCL_ASSERT(0); + const char *src = cl_file_map_begin(fm); + + *pg = clCreateProgramWithSource(ctx, 1, &src, NULL, &err); + free(ker_path); + cl_file_map_delete(fm); + return 0; + +} + +void runtime_compile_link(void) +{ + + cl_int err; + + const char* header_file_name="multi2.h"; + cl_program foo_pg; + init_program(header_file_name, ctx, &foo_pg); + + const char* myinc_file_name="mydir/multi3.h"; + cl_program myinc_pg; + init_program(myinc_file_name, ctx, &myinc_pg); + + const char* file_name_A="multi_A.cl"; + cl_program program_A; + init_program(file_name_A, ctx, &program_A); + + cl_program input_headers[2] = { foo_pg, myinc_pg}; + const char * input_header_names[2] = { "multi2.h", "mydir/multi3.h"}; + + err = clCompileProgram(program_A, +0, NULL, // num_devices & device_list +NULL, // compile_options +2, // num_input_headers +input_headers, +input_header_names, +NULL, NULL); + + OCL_ASSERT(err==CL_SUCCESS); + const char* file_name_B="multi_B.cl"; + cl_program program_B; + init_program(file_name_B, ctx, &program_B); + + err = clCompileProgram(program_B, +0, NULL, // num_devices & device_list +NULL, // compile_options +2, // num_input_headers +input_headers, +input_header_names, +NULL, NULL); + + OCL_ASSERT(err==CL_SUCCESS); + cl_program input_programs[2] = { program_A, program_B}; + cl_program linked_program = clLinkProgram(ctx, 0, NULL, NULL, 2, input_programs, NULL, NULL, &err); + + + OCL_ASSERT(linked_program != NULL); + OCL_ASSERT(err == CL_SUCCESS); + + // link success, run this kernel. + + const size_t n = 16; + int64_t src1[n], src2[n]; + + src1[0] = (int64_t)1 << 63, src2[0] = 0x7FFFll; + src1[1] = (int64_t)1 << 63, src2[1] = ((int64_t)1 << 63) | 1; + src1[2] = -1ll, src2[2] = 0; + src1[3] = ((int64_t)123 << 32) | 0x7FFF, src2[3] = ((int64_t)123 << 32) | 0x8
[Beignet] [PATCH V2 0/3] opencl-1.2 compile/link implementation.
From: Luo 1. this patchset depends on the patch from master called "remove the code of saving the llvm bitcode to file, replace it with llvm::Module"; 2. clBuildProgram path is independent of clCompileProgram/clLinkProgram. clBuildProgram allocates new LLVMContext, compile/link use the global LLVMContext. Luo (3): add [opencl-1.2] API clCompileProgram. add [opencl-1.2] API clLinkProgram. add [opencl-1.2] test case runtime_cl. backend/src/backend/gen_program.cpp | 120 backend/src/backend/program.cpp | 178 backend/src/backend/program.h | 38 kernels/multi2.h| 1 + kernels/multi_A.cl | 13 +++ kernels/multi_B.cl | 9 ++ kernels/mydir/multi3.h | 4 + src/cl_api.c| 74 +++ src/cl_gbe_loader.cpp | 16 src/cl_program.c| 138 src/cl_program.h| 15 ++- utests/CMakeLists.txt | 1 + utests/runtime_compile_link.cpp | 127 + 13 files changed, 733 insertions(+), 1 deletion(-) create mode 100644 kernels/multi2.h create mode 100644 kernels/multi_A.cl create mode 100644 kernels/multi_B.cl create mode 100644 kernels/mydir/multi3.h create mode 100644 utests/runtime_compile_link.cpp -- 1.8.1.2 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH] add [opencl-1.2] clUnloadPlatformCompiler implementation
From: Luo just a empty hook. Signed-off-by: Luo --- src/cl_api.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/src/cl_api.c b/src/cl_api.c index 8598088..824a952 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -939,6 +939,12 @@ clUnloadCompiler(void) } cl_int +clUnloadPlatformCompiler(cl_platform_id platform) +{ + return CL_SUCCESS; +} + +cl_int clGetProgramInfo(cl_program program, cl_program_info param_name, size_t param_value_size, -- 1.8.1.2 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH V2] add [opencl-1.2] API clCreateSubDevice.
From: Luo creates an array of sub-devices that each reference a non-intersecting set of compute units within in_device, according to a partition scheme given by properties. --- src/cl_api.c | 10 -- src/cl_device_id.c | 6 ++ src/cl_device_id.h | 7 +++ src/cl_gt_device.h | 7 ++- 4 files changed, 27 insertions(+), 3 deletions(-) diff --git a/src/cl_api.c b/src/cl_api.c index 8598088..8264970 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -242,8 +242,14 @@ clCreateSubDevices(cl_device_id in_device, cl_device_id * out_devices, cl_uint *num_devices_ret) { - NOT_IMPLEMENTED; - return 0; + /* Check parameter consistency */ + if (UNLIKELY(out_devices == NULL && num_devices_ret == NULL)) +return CL_INVALID_VALUE; + if (UNLIKELY(in_device == NULL && properties == NULL)) +return CL_INVALID_VALUE; + + *num_devices_ret = 0; + return CL_INVALID_DEVICE_PARTITION_COUNT; } cl_int diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 8ec7741..df37519 100644 --- a/src/cl_device_id.c +++ b/src/cl_device_id.c @@ -393,6 +393,12 @@ cl_get_device_info(cl_device_id device, DECL_STRING_FIELD(OPENCL_C_VERSION, opencl_c_version) DECL_STRING_FIELD(EXTENSIONS, extensions); DECL_STRING_FIELD(BUILT_IN_KERNELS, built_in_kernels) +DECL_FIELD(PARENT_DEVICE, parent_device) +DECL_FIELD(PARTITION_MAX_SUB_DEVICES, partition_max_sub_device) +DECL_FIELD(PARTITION_PROPERTIES, partition_property) +DECL_FIELD(PARTITION_AFFINITY_DOMAIN, affinity_domain) +DECL_FIELD(PARTITION_TYPE, partition_type) +DECL_FIELD(REFERENCE_COUNT, device_reference_count) case CL_DRIVER_VERSION: if (param_value_size_ret) { diff --git a/src/cl_device_id.h b/src/cl_device_id.h index 2bbe98e..a5449a7 100644 --- a/src/cl_device_id.h +++ b/src/cl_device_id.h @@ -98,6 +98,13 @@ struct _cl_device_id { /* Kernel specific info that we're assigning statically */ size_t wg_sz; size_t preferred_wg_sz_mul; + /* SubDevice specific info */ + cl_device_id parent_device; + cl_uint partition_max_sub_device; + cl_device_partition_property partition_property[3]; + cl_device_affinity_domainaffinity_domain; + cl_device_partition_property partition_type[3]; + cl_uint device_reference_count; }; /* Get a device from the given platform */ diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h index cab2c58..b8bda5e 100644 --- a/src/cl_gt_device.h +++ b/src/cl_gt_device.h @@ -101,5 +101,10 @@ DECL_INFO_STRING(built_in_kernels, "__cl_copy_region_align4;" DECL_INFO_STRING(driver_version, LIBCL_DRIVER_VERSION_STRING) #undef DECL_INFO_STRING - +.parent_device = NULL, +.partition_max_sub_device = 1, +.partition_property = {0}, +.affinity_domain = 0, +.partition_type = {0}, +.device_reference_count = 1, -- 1.8.1.2 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH V3] remove the code of saving the llvm bitcode to file, replace it with llvm::Module
From: Luo Save the global LLVMContext and module pointer to GenProgram, delete the module pointer in the destructor. Signed-off-by: Luo --- backend/src/backend/gen_program.cpp | 33 +++-- backend/src/backend/gen_program.hpp | 4 +- backend/src/backend/program.cpp | 95 - backend/src/backend/program.h | 3 +- backend/src/backend/program.hpp | 2 +- backend/src/llvm/llvm_to_gen.cpp| 16 --- backend/src/llvm/llvm_to_gen.hpp| 2 +- src/cl_program.c| 2 +- 8 files changed, 100 insertions(+), 57 deletions(-) diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index d2e95d4..33f2ed6 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -22,6 +22,17 @@ * \author Benjamin Segovia */ +#include "llvm/Config/config.h" +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2 +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/DataLayout.h" +#else +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/DataLayout.h" +#endif /* LLVM_VERSION_MINOR <= 2 */ + #include "backend/program.h" #include "backend/gen_program.h" #include "backend/gen_program.hpp" @@ -33,6 +44,8 @@ #include "ir/unit.hpp" #include "llvm/llvm_to_gen.hpp" +#include + #include #include #include @@ -74,7 +87,19 @@ namespace gbe { #endif } - GenProgram::~GenProgram(void) {} + GenProgram::~GenProgram(void){ +#ifdef GBE_COMPILER_AVAILABLE +if(module){ + delete (llvm::Module*)module; + module = NULL; +} + +if(llvm_ctx){ + delete (llvm::LLVMContext*)llvm_ctx; + llvm_ctx = NULL; +} +#endif + } /*! We must avoid spilling at all cost with Gen */ static const struct CodeGenStrategy { @@ -182,17 +207,19 @@ namespace gbe { static gbe_program genProgramNewFromLLVM(uint32_t deviceID, const char *fileName, + const void* module, + const void* llvm_ctx, size_t stringSize, char *err, size_t *errSize, int optLevel) { using namespace gbe; -GenProgram *program = GBE_NEW(GenProgram, deviceID); +GenProgram *program = GBE_NEW(GenProgram, deviceID, module, llvm_ctx); #ifdef GBE_COMPILER_AVAILABLE std::string error; // Try to compile the program -if (program->buildFromLLVMFile(fileName, error, optLevel) == false) { +if (program->buildFromLLVMFile(fileName, module, error, optLevel) == false) { if (err != NULL && errSize != NULL && stringSize > 0u) { const size_t msgSize = std::min(error.size(), stringSize-1u); std::memcpy(err, error.c_str(), msgSize); diff --git a/backend/src/backend/gen_program.hpp b/backend/src/backend/gen_program.hpp index ea54b49..70794c9 100644 --- a/backend/src/backend/gen_program.hpp +++ b/backend/src/backend/gen_program.hpp @@ -58,7 +58,7 @@ namespace gbe { public: /*! Create an empty program */ -GenProgram(uint32_t deviceID) : deviceID(deviceID) {} +GenProgram(uint32_t deviceID, const void* mod = NULL, const void* ctx = NULL) : deviceID(deviceID),module((void*)mod), llvm_ctx((void*)ctx) {} /*! Current device ID*/ uint32_t deviceID; /*! Destroy the program */ @@ -69,6 +69,8 @@ namespace gbe virtual Kernel *allocateKernel(const std::string &name) { return GBE_NEW(GenKernel, name); } +void* module; +void* llvm_ctx; /*! Use custom allocators */ GBE_CLASS(GenProgram); }; diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp index 949aeb4..6e50761 100644 --- a/backend/src/backend/program.cpp +++ b/backend/src/backend/program.cpp @@ -34,6 +34,8 @@ #include "llvm/Config/config.h" #include "llvm/Support/Threading.h" #include "llvm/Support/ManagedStatic.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/IR/LLVMContext.h" #include #include #include @@ -103,9 +105,13 @@ namespace gbe { #ifdef GBE_COMPILER_AVAILABLE BVAR(OCL_OUTPUT_GEN_IR, false); - bool Program::buildFromLLVMFile(const char *fileName, std::string &error, int optLevel) { + bool Program::buildFromLLVMFile(const char *fileName, const void* module, std::string &error, int optLevel) { ir::Unit *unit = new ir::Unit(); -if (llvmToGen(*unit, fileName, optLevel) == false) { +llvm::Module * cloned_module = NULL; +if(module){ + cloned_module = llvm::CloneModule((llvm::Module*)module); +} +if (llvmToGen(*unit, fileName, module, optLevel) == false) { error = std::string(fileName) + " not found"; return false; } @@ -114,11 +120,18 @@ namespace gbe { if(!unit->getValid()) {
[Beignet] [PATCH V2 0/3] opencl-1.2 compile/link implementation.
From: Luo 1. this patchset depends on the patch from master called "remove the code of saving the llvm bitcode to file, replace it with llvm::Module"; 2. clBuildProgram path is independent of clCompileProgram/clLinkProgram. clBuildProgram allocates new LLVMContext, compile/link use the global LLVMContext. Luo (3): add [opencl-1.2] API clCompileProgram. add [opencl-1.2] API clLinkProgram. add [opencl-1.2] test case runtime_cl. backend/src/backend/gen_program.cpp | 120 backend/src/backend/program.cpp | 178 backend/src/backend/program.h | 38 kernels/multi2.h| 1 + kernels/multi_A.cl | 13 +++ kernels/multi_B.cl | 9 ++ kernels/mydir/multi3.h | 4 + src/cl_api.c| 74 +++ src/cl_gbe_loader.cpp | 16 src/cl_program.c| 138 src/cl_program.h| 15 ++- utests/CMakeLists.txt | 1 + utests/runtime_compile_link.cpp | 127 + 13 files changed, 733 insertions(+), 1 deletion(-) create mode 100644 kernels/multi2.h create mode 100644 kernels/multi_A.cl create mode 100644 kernels/multi_B.cl create mode 100644 kernels/mydir/multi3.h create mode 100644 utests/runtime_compile_link.cpp -- 1.8.1.2 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH v4 1/3] add [opencl-1.2] API clCompileProgram.
From: Luo This API compiles a program's source for all the devices or a specific device in the OpenCL context associated with program. The pre-processor runs before the program sources are compiled. Signed-off-by: Luo --- backend/src/backend/gen_program.cpp | 9 backend/src/backend/program.cpp | 60 +++ backend/src/backend/program.h | 20 src/cl_api.c| 41 src/cl_gbe_loader.cpp | 4 ++ src/cl_program.c| 96 + src/cl_program.h| 8 +++- 7 files changed, 237 insertions(+), 1 deletion(-) diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index 33f2ed6..1d19289 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -232,6 +232,14 @@ namespace gbe { // Everything run fine return (gbe_program) program; } + + static gbe_program genProgramNewGenProgram(uint32_t deviceID, const + void* module, const void* llvm_ctx) { +using namespace gbe; +GenProgram *program = GBE_NEW(GenProgram, deviceID, module, llvm_ctx); +// Everything run fine +return (gbe_program) program; + } } /* namespace gbe */ void genSetupCallBacks(void) @@ -239,4 +247,5 @@ void genSetupCallBacks(void) gbe_program_new_from_binary = gbe::genProgramNewFromBinary; gbe_program_serialize_to_binary = gbe::genProgramSerializeToBinary; gbe_program_new_from_llvm = gbe::genProgramNewFromLLVM; + gbe_program_new_gen_program = gbe::genProgramNewGenProgram; } diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp index d23529a..121153d 100644 --- a/backend/src/backend/program.cpp +++ b/backend/src/backend/program.cpp @@ -861,6 +861,52 @@ namespace gbe { } #endif +#ifdef GBE_COMPILER_AVAILABLE + + static gbe_program programCompileFromSource(uint32_t deviceID, + const char *source, + const char *temp_header_path, + size_t stringSize, + const char *options, + char *err, + size_t *errSize) + { +int optLevel = 1; +std::string clOpt; +std::string clName; +processSourceAndOption(source, options, temp_header_path, clOpt, clName, optLevel); + +gbe_program p; +acquireLLVMContextLock(); +//FIXME: if use new allocated context to link two modules there would be context mismatch +//for some functions, so we use global context now, need switch to new context later. +llvm::Module * out_module; +llvm::LLVMContext* llvm_ctx = &llvm::getGlobalContext(); +if (buildModuleFromSource(clName.c_str(), &out_module, llvm_ctx, clOpt.c_str(), + stringSize, err, errSize)) { +// Now build the program from llvm + size_t clangErrSize = 0; + if (err != NULL) { +GBE_ASSERT(errSize != NULL); +stringSize -= *errSize; +err += *errSize; +clangErrSize = *errSize; + } + + p = gbe_program_new_gen_program(deviceID, out_module, NULL); + + if (err != NULL) +*errSize += clangErrSize; + if (OCL_OUTPUT_BUILD_LOG && options) +llvm::errs() << options; +} else + p = NULL; +remove(clName.c_str()); +releaseLLVMContextLock(); +return p; + } +#endif + static size_t programGetGlobalConstantSize(gbe_program gbeProgram) { if (gbeProgram == NULL) return 0; const gbe::Program *program = (const gbe::Program*) gbeProgram; @@ -1024,10 +1070,23 @@ namespace gbe { } } /* namespace gbe */ +std::mutex llvm_ctx_mutex; +void acquireLLVMContextLock() +{ + llvm_ctx_mutex.lock(); +} + +void releaseLLVMContextLock() +{ + llvm_ctx_mutex.unlock(); +} + GBE_EXPORT_SYMBOL gbe_program_new_from_source_cb *gbe_program_new_from_source = NULL; +GBE_EXPORT_SYMBOL gbe_program_compile_from_source_cb *gbe_program_compile_from_source = NULL; GBE_EXPORT_SYMBOL gbe_program_new_from_binary_cb *gbe_program_new_from_binary = NULL; GBE_EXPORT_SYMBOL gbe_program_serialize_to_binary_cb *gbe_program_serialize_to_binary = NULL; GBE_EXPORT_SYMBOL gbe_program_new_from_llvm_cb *gbe_program_new_from_llvm = NULL; +GBE_EXPORT_SYMBOL gbe_program_new_gen_program_cb *gbe_program_new_gen_program = NULL; GBE_EXPORT_SYMBOL gbe_program_get_global_constant_size_cb *gbe_program_get_global_constant_size = NULL; GBE_EXPORT_SYMBOL gbe_program_get_global_constant_data_cb *gbe_program_get_global_constant_data = NULL; GBE_EXPORT_SYMBOL gbe_program_delete_cb *gbe_program_delete = NULL; @@ -1067,6 +1126,7 @@ namespace gbe { CallBackInitializer(void) { gbe_program_new_from_source = gbe::programNewFromSource; + gbe_program_compile_from_source = gbe::programCompileFromSource
[Beignet] [PATCH v4 3/3] add [opencl-1.2] test case runtime_compile_link.
From: Luo Signed-off-by: Luo --- kernels/multi2.h| 1 + kernels/multi_A.cl | 13 kernels/multi_B.cl | 9 +++ kernels/mydir/multi3.h | 4 ++ utests/CMakeLists.txt | 1 + utests/runtime_compile_link.cpp | 127 6 files changed, 155 insertions(+) create mode 100644 kernels/multi2.h create mode 100644 kernels/multi_A.cl create mode 100644 kernels/multi_B.cl create mode 100644 kernels/mydir/multi3.h create mode 100644 utests/runtime_compile_link.cpp diff --git a/kernels/multi2.h b/kernels/multi2.h new file mode 100644 index 000..ae2c56e --- /dev/null +++ b/kernels/multi2.h @@ -0,0 +1 @@ +int comp_long(long x, long y); diff --git a/kernels/multi_A.cl b/kernels/multi_A.cl new file mode 100644 index 000..9282b8d --- /dev/null +++ b/kernels/multi_A.cl @@ -0,0 +1,13 @@ +#include "multi2.h" +#include "mydir/multi3.h" + +int comp_long(long x, long y) +{ + return x < y ; +} + +kernel void multi_A(global long *src1, global long *src2, global long *dst) { + int i = get_global_id(0); + int j = comp_long(src1[i], src2[i]); + dst[i] = j ? 3 : 4; +} diff --git a/kernels/multi_B.cl b/kernels/multi_B.cl new file mode 100644 index 000..de147eb --- /dev/null +++ b/kernels/multi_B.cl @@ -0,0 +1,9 @@ +#include "multi2.h" +#include "mydir/multi3.h" + +kernel void multi_B(global long *src1, global long *src2, global long *dst) { + int i = get_global_id(0); + int j = comp_long(src1[i], src2[i]); + dst[i] = j ? 3 : 4; + int k = greater(src1[i], src2[i]); +} diff --git a/kernels/mydir/multi3.h b/kernels/mydir/multi3.h new file mode 100644 index 000..4011278 --- /dev/null +++ b/kernels/mydir/multi3.h @@ -0,0 +1,4 @@ +inline int greater(long x, long y) +{ + return x > y ; +} diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 698c9ff..bee3e8f 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -157,6 +157,7 @@ set (utests_sources runtime_event.cpp runtime_barrier_list.cpp runtime_marker_list.cpp + runtime_compile_link.cpp compiler_double.cpp compiler_double_2.cpp compiler_double_3.cpp diff --git a/utests/runtime_compile_link.cpp b/utests/runtime_compile_link.cpp new file mode 100644 index 000..17fe413 --- /dev/null +++ b/utests/runtime_compile_link.cpp @@ -0,0 +1,127 @@ +#include +#include +#include +#include "utest_helper.hpp" +#include "utest_file_map.hpp" + +#define BUFFERSIZE 32*1024 + +int init_program(const char* name, cl_context ctx, cl_program *pg ) +{ + cl_int err; + char* ker_path = cl_do_kiss_path(name, device); + + cl_file_map_t *fm = cl_file_map_new(); + err = cl_file_map_open(fm, ker_path); + if(err != CL_FILE_MAP_SUCCESS) +OCL_ASSERT(0); + const char *src = cl_file_map_begin(fm); + + *pg = clCreateProgramWithSource(ctx, 1, &src, NULL, &err); + free(ker_path); + cl_file_map_delete(fm); + return 0; + +} + +void runtime_compile_link(void) +{ + + cl_int err; + + const char* header_file_name="multi2.h"; + cl_program foo_pg; + init_program(header_file_name, ctx, &foo_pg); + + const char* myinc_file_name="mydir/multi3.h"; + cl_program myinc_pg; + init_program(myinc_file_name, ctx, &myinc_pg); + + const char* file_name_A="multi_A.cl"; + cl_program program_A; + init_program(file_name_A, ctx, &program_A); + + cl_program input_headers[2] = { foo_pg, myinc_pg}; + const char * input_header_names[2] = { "multi2.h", "mydir/multi3.h"}; + + err = clCompileProgram(program_A, +0, NULL, // num_devices & device_list +NULL, // compile_options +2, // num_input_headers +input_headers, +input_header_names, +NULL, NULL); + + OCL_ASSERT(err==CL_SUCCESS); + const char* file_name_B="multi_B.cl"; + cl_program program_B; + init_program(file_name_B, ctx, &program_B); + + err = clCompileProgram(program_B, +0, NULL, // num_devices & device_list +NULL, // compile_options +2, // num_input_headers +input_headers, +input_header_names, +NULL, NULL); + + OCL_ASSERT(err==CL_SUCCESS); + cl_program input_programs[2] = { program_A, program_B}; + cl_program linked_program = clLinkProgram(ctx, 0, NULL, NULL, 2, input_programs, NULL, NULL, &err); + + + OCL_ASSERT(linked_program != NULL); + OCL_ASSERT(err == CL_SUCCESS); + + // link success, run this kernel. + + const size_t n = 16; + int64_t src1[n], src2[n]; + + src1[0] = (int64_t)1 << 63, src2[0] = 0x7FFFll; + src1[1] = (int64_t)1 << 63, src2[1] = ((int64_t)1 << 63) | 1; + src1[2] = -1ll, src2[2] = 0; + src1[3] = ((int64_t)123 << 32) | 0x7FFF, src2[3] = ((int64_t)123 << 32) | 0x8
[Beignet] [PATCH v4 2/3] add [opencl-1.2] API clLinkProgram.
From: Luo this API links a set of compiled program objects and libraries for all the devices or a specific device(s) in the OpenCL context and creates an executable. the llvm bitcode in the compiled program objects are linked together and built to Gen binary. Signed-off-by: Luo --- backend/src/backend/gen_program.cpp | 88 - backend/src/backend/program.cpp | 28 +--- backend/src/backend/program.h | 28 src/cl_api.c| 33 ++ src/cl_gbe_loader.cpp | 12 + src/cl_program.c| 49 + src/cl_program.h| 7 +++ 7 files changed, 238 insertions(+), 7 deletions(-) diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index 1d19289..ad9043b 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -33,6 +33,9 @@ #include "llvm/IR/DataLayout.h" #endif /* LLVM_VERSION_MINOR <= 2 */ +#include "llvm/Linker.h" +#include "llvm/Transforms/Utils/Cloning.h" + #include "backend/program.h" #include "backend/gen_program.h" #include "backend/gen_program.hpp" @@ -51,6 +54,7 @@ #include #include #include +#include namespace gbe { @@ -233,13 +237,91 @@ namespace gbe { return (gbe_program) program; } - static gbe_program genProgramNewGenProgram(uint32_t deviceID, const - void* module, const void* llvm_ctx) { + static gbe_program genProgramNewGenProgram(uint32_t deviceID, const void* module, const void* llvm_ctx) + { using namespace gbe; GenProgram *program = GBE_NEW(GenProgram, deviceID, module, llvm_ctx); // Everything run fine return (gbe_program) program; } + + static void genProgramLinkFromLLVM(gbe_program dst_program, + gbe_program src_program, + size_tstringSize, + char *err, + size_t * errSize) + { +#ifdef GBE_COMPILER_AVAILABLE +using namespace gbe; +std::string errMsg; +if(((GenProgram*)dst_program)->module == NULL){ + ((GenProgram*)dst_program)->module = llvm::CloneModule((llvm::Module*)((GenProgram*)src_program)->module); + errSize = 0; +}else{ + //set the global variables and functions to link once to fix redefine. + llvm::Module* src = (llvm::Module*)((GenProgram*)src_program)->module; + for (llvm::Module::global_iterator I = src->global_begin(), E = src->global_end(); I != E; ++I) { +I->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + } + + for (llvm::Module::iterator I = src->begin(), E = src->end(); I != E; ++I) { +I->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + } + + llvm::Module* dst = (llvm::Module*)((GenProgram*)dst_program)->module; + llvm::Linker::LinkModules( dst, + src, + llvm::Linker::PreserveSource, + &errMsg); + if (errMsg.c_str() != NULL) { +if (err != NULL && errSize != NULL && stringSize > 0u) { + if(errMsg.length() < stringSize ) +stringSize = errMsg.length(); + strcpy(err, errMsg.c_str()); + err[stringSize+1] = '\0'; +} + } +} +// Everything run fine +#endif + } + + static void genProgramBuildFromLLVM(gbe_program program, + size_t stringSize, + char *err, + size_t *errSize, + const char * options) + { +#ifdef GBE_COMPILER_AVAILABLE +using namespace gbe; +std::string error; + +int optLevel = 1; + +if(options) { + char *p; + p = strstr(const_cast(options), "-cl-opt-disable"); + if (p) +optLevel = 0; +} + +GenProgram* p = (GenProgram*) program; +// Try to compile the program +acquireLLVMContextLock(); +llvm::Module* module = (llvm::Module*)p->module; + +if (p->buildFromLLVMFile(NULL, module, error, optLevel) == false) { + if (err != NULL && errSize != NULL && stringSize > 0u) { +const size_t msgSize = std::min(error.size(), stringSize-1u); +std::memcpy(err, error.c_str(), msgSize); +*errSize = error.size(); + } + GBE_DELETE(p); +} +releaseLLVMContextLock(); +#endif + } + } /* namespace gbe */ void genSetupCallBacks(void) @@ -248,4 +330,6 @@ void genSetupCallBacks(void) gbe_program_serialize_to_binary = gbe::genProgramSerializeToBinary; gbe_program_new_from_llvm = gbe::genProgramNewFromLLVM; gbe_program_new_gen_program = gbe::genProgramNewGenProgram; + gbe_program_link_from_llvm = gbe::genProgramLinkFromLLVM; + gbe_program_build_from_llvm = gbe::
[Beignet] [PATCH v5 3/3] add [opencl-1.2] test case runtime_compile_link.
From: Luo Signed-off-by: Luo --- kernels/include/runtime_compile_link_inc.h | 4 + kernels/runtime_compile_link.h | 1 + kernels/runtime_compile_link_a.cl | 13 +++ kernels/runtime_compile_link_b.cl | 9 ++ utests/CMakeLists.txt | 1 + utests/runtime_compile_link.cpp| 127 + 6 files changed, 155 insertions(+) create mode 100644 kernels/include/runtime_compile_link_inc.h create mode 100644 kernels/runtime_compile_link.h create mode 100644 kernels/runtime_compile_link_a.cl create mode 100644 kernels/runtime_compile_link_b.cl create mode 100644 utests/runtime_compile_link.cpp diff --git a/kernels/include/runtime_compile_link_inc.h b/kernels/include/runtime_compile_link_inc.h new file mode 100644 index 000..4011278 --- /dev/null +++ b/kernels/include/runtime_compile_link_inc.h @@ -0,0 +1,4 @@ +inline int greater(long x, long y) +{ + return x > y ; +} diff --git a/kernels/runtime_compile_link.h b/kernels/runtime_compile_link.h new file mode 100644 index 000..ae2c56e --- /dev/null +++ b/kernels/runtime_compile_link.h @@ -0,0 +1 @@ +int comp_long(long x, long y); diff --git a/kernels/runtime_compile_link_a.cl b/kernels/runtime_compile_link_a.cl new file mode 100644 index 000..b17861f --- /dev/null +++ b/kernels/runtime_compile_link_a.cl @@ -0,0 +1,13 @@ +#include "runtime_compile_link.h" +#include "include/runtime_compile_link_inc.h" + +int comp_long(long x, long y) +{ + return x < y ; +} + +kernel void runtime_compile_link_a(global long *src1, global long *src2, global long *dst) { + int i = get_global_id(0); + int j = comp_long(src1[i], src2[i]); + dst[i] = j ? 3 : 4; +} diff --git a/kernels/runtime_compile_link_b.cl b/kernels/runtime_compile_link_b.cl new file mode 100644 index 000..89b5a2d --- /dev/null +++ b/kernels/runtime_compile_link_b.cl @@ -0,0 +1,9 @@ +#include "runtime_compile_link.h" +#include "include/runtime_compile_link_inc.h" + +kernel void runtime_compile_link_b(global long *src1, global long *src2, global long *dst) { + int i = get_global_id(0); + int j = comp_long(src1[i], src2[i]); + dst[i] = j ? 3 : 4; + int k = greater(src1[i], src2[i]); +} diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 698c9ff..bee3e8f 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -157,6 +157,7 @@ set (utests_sources runtime_event.cpp runtime_barrier_list.cpp runtime_marker_list.cpp + runtime_compile_link.cpp compiler_double.cpp compiler_double_2.cpp compiler_double_3.cpp diff --git a/utests/runtime_compile_link.cpp b/utests/runtime_compile_link.cpp new file mode 100644 index 000..8aeea31 --- /dev/null +++ b/utests/runtime_compile_link.cpp @@ -0,0 +1,127 @@ +#include +#include +#include +#include "utest_helper.hpp" +#include "utest_file_map.hpp" + +#define BUFFERSIZE 32*1024 + +int init_program(const char* name, cl_context ctx, cl_program *pg ) +{ + cl_int err; + char* ker_path = cl_do_kiss_path(name, device); + + cl_file_map_t *fm = cl_file_map_new(); + err = cl_file_map_open(fm, ker_path); + if(err != CL_FILE_MAP_SUCCESS) +OCL_ASSERT(0); + const char *src = cl_file_map_begin(fm); + + *pg = clCreateProgramWithSource(ctx, 1, &src, NULL, &err); + free(ker_path); + cl_file_map_delete(fm); + return 0; + +} + +void runtime_compile_link(void) +{ + + cl_int err; + + const char* header_file_name="runtime_compile_link.h"; + cl_program foo_pg; + init_program(header_file_name, ctx, &foo_pg); + + const char* myinc_file_name="include/runtime_compile_link_inc.h"; + cl_program myinc_pg; + init_program(myinc_file_name, ctx, &myinc_pg); + + const char* file_name_A="runtime_compile_link_a.cl"; + cl_program program_A; + init_program(file_name_A, ctx, &program_A); + + cl_program input_headers[2] = { foo_pg, myinc_pg}; + const char * input_header_names[2] = {header_file_name, myinc_file_name}; + + err = clCompileProgram(program_A, +0, NULL, // num_devices & device_list +NULL, // compile_options +2, // num_input_headers +input_headers, +input_header_names, +NULL, NULL); + + OCL_ASSERT(err==CL_SUCCESS); + const char* file_name_B="runtime_compile_link_b.cl"; + cl_program program_B; + init_program(file_name_B, ctx, &program_B); + + err = clCompileProgram(program_B, +0, NULL, // num_devices & device_list +NULL, // compile_options +2, // num_input_headers +input_headers, +input_header_names, +NULL, NULL); + + OCL_ASSERT(err==CL_SUCCESS); + cl_program input_programs[2] = { program_A, program_B}; + cl_program linked_program = clLinkProgr
[Beignet] [PATCH] [opencl-1.2 add binary type support for compiled object and library.
From: Luo save the llvm bitecode to program->binary: insert a bite in front of the bitcode stands for binary type(1 means COMPILED_OBJECT, 2 means LIBRARY); load the binary to module by ParseIR. create random directory to save compile header files. Signed-off-by: Luo --- backend/src/backend/gen_program.cpp | 77 + backend/src/backend/program.cpp | 1 + backend/src/backend/program.h | 8 +++- src/cl_api.c| 25 ++-- src/cl_gbe_loader.cpp | 4 ++ src/cl_khr_icd.c| 4 +- src/cl_program.c| 67 ++-- src/cl_program.h| 1 + 8 files changed, 170 insertions(+), 17 deletions(-) diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index ad9043b..5324b8c 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -35,6 +35,12 @@ #include "llvm/Linker.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/IRReader/IRReader.h" #include "backend/program.h" #include "backend/gen_program.h" @@ -55,6 +61,7 @@ #include #include #include +#include namespace gbe { @@ -193,20 +200,75 @@ namespace gbe { return reinterpret_cast(program); } - static size_t genProgramSerializeToBinary(gbe_program program, char **binary) { + static gbe_program genProgramNewFromLLVMBinary(uint32_t deviceID, const char *binary, size_t size) { +#ifdef GBE_COMPILER_AVAILABLE +using namespace gbe; +std::string binary_content; +//the first bit stands for binary_type. +binary_content.assign(binary+1, size-1); +llvm::StringRef llvm_bin_str(binary_content); +llvm::LLVMContext& c = llvm::getGlobalContext(); +llvm::SMDiagnostic Err; +llvm::MemoryBuffer* memory_buffer = llvm::MemoryBuffer::getMemBuffer(llvm_bin_str, "llvm_bin_str"); +llvm::Module* module = llvm::ParseIR(memory_buffer, Err, c); + +GenProgram *program = GBE_NEW(GenProgram, deviceID, module); + +//program->printStatus(0, std::cout); +return reinterpret_cast(program); +#else + return NULL; +#endif + } + + static size_t genProgramSerializeToBinary(gbe_program program, char **binary, int binary_type) { using namespace gbe; size_t sz; std::ostringstream oss; GenProgram *prog = (GenProgram*)program; -if ((sz = prog->serializeToBin(oss)) == 0) { - *binary = 0; +//0 means GEN binary, 1 means LLVM bitcode compiled object, 2 means LLVM bitcode library +if(binary_type == 0){ + if ((sz = prog->serializeToBin(oss)) == 0) { +*binary = 0; +return 0; + } + + *binary = (char *)malloc(sizeof(char) * sz); + memcpy(*binary, oss.str().c_str(), sz*sizeof(char)); + return sz; +}else{ +#ifdef GBE_COMPILER_AVAILABLE + char llStr[] = "/tmp/XX.ll"; + int llFd = mkstemps(llStr, 3); + close(llFd); + const std::string llName = std::string(llStr); + std::string errorInfo; +#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR > 3) + auto mode = llvm::sys::fs::F_Binary; +#else + auto mode = llvm::raw_fd_ostream::F_Binary; +#endif + llvm::raw_fd_ostream OS(llName.c_str(), errorInfo, mode); + llvm::WriteBitcodeToFile((llvm::Module*)prog->module, OS); + OS.close(); + FILE* pfile = fopen(llName.c_str(), "rb"); + fseek(pfile, 0, SEEK_END); + int llsz = ftell(pfile); + rewind(pfile); + *binary = (char *)malloc(sizeof(char) * (llsz+1) ); + int result = fread(*binary+1, 1, llsz, pfile); + if(result != llsz){ +GBE_ASSERT(0); + } + *(*binary) = binary_type; + fclose(pfile); + remove(llName.c_str()); + return llsz+1; +#else return 0; +#endif } - -*binary = (char *)malloc(sizeof(char) * sz); -memcpy(*binary, oss.str().c_str(), sz*sizeof(char)); -return sz; } static gbe_program genProgramNewFromLLVM(uint32_t deviceID, @@ -327,6 +389,7 @@ namespace gbe { void genSetupCallBacks(void) { gbe_program_new_from_binary = gbe::genProgramNewFromBinary; + gbe_program_new_from_llvm_binary = gbe::genProgramNewFromLLVMBinary; gbe_program_serialize_to_binary = gbe::genProgramSerializeToBinary; gbe_program_new_from_llvm = gbe::genProgramNewFromLLVM; gbe_program_new_gen_program = gbe::genProgramNewGenProgram; diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp index b4c56b7..ebaf3d3 100644 --- a/backend/src/backend/program.cpp +++ b/backend/src/backend/program.cpp @@ -1099,6 +1099,7 @@ GBE_EXPORT_SYMBOL gbe_program_new_from_source_cb *gbe_program_new_from_source = GBE_EXPORT_SYMBOL gbe_program_compile_from_source_cb *gbe_program_compile_from_s
[Beignet] [PATCH] add binary type support for compiled object and library.
From: Luo save the llvm bitecode to program->binary: insert a bite in front of the bitcode stands for binary type(1 means COMPILED_OBJECT, 2 means LIBRARY); load the binary to module by ParseIR. create random directory to save compile header files. use strncpy and strncat to replace strcpy and strcat. Signed-off-by: Luo Conflicts: src/cl_api.c src/cl_gbe_loader.cpp src/cl_khr_icd.c src/cl_program.c --- backend/src/backend/gen_program.cpp | 77 + backend/src/backend/program.cpp | 1 + backend/src/backend/program.h | 8 +++- src/cl_api.c| 25 ++-- src/cl_gbe_loader.cpp | 25 +++- src/cl_gbe_loader.h | 10 +++-- src/cl_program.c| 71 ++ src/cl_program.h| 1 + 8 files changed, 185 insertions(+), 33 deletions(-) diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index 300741e..2ef8307 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -35,6 +35,12 @@ #include "llvm/Linker.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/IRReader/IRReader.h" #include "backend/program.h" #include "backend/gen_program.h" @@ -55,6 +61,7 @@ #include #include #include +#include namespace gbe { @@ -203,20 +210,75 @@ namespace gbe { return reinterpret_cast(program); } - static size_t genProgramSerializeToBinary(gbe_program program, char **binary) { + static gbe_program genProgramNewFromLLVMBinary(uint32_t deviceID, const char *binary, size_t size) { +#ifdef GBE_COMPILER_AVAILABLE +using namespace gbe; +std::string binary_content; +//the first bit stands for binary_type. +binary_content.assign(binary+1, size-1); +llvm::StringRef llvm_bin_str(binary_content); +llvm::LLVMContext& c = llvm::getGlobalContext(); +llvm::SMDiagnostic Err; +llvm::MemoryBuffer* memory_buffer = llvm::MemoryBuffer::getMemBuffer(llvm_bin_str, "llvm_bin_str"); +llvm::Module* module = llvm::ParseIR(memory_buffer, Err, c); + +GenProgram *program = GBE_NEW(GenProgram, deviceID, module); + +//program->printStatus(0, std::cout); +return reinterpret_cast(program); +#else + return NULL; +#endif + } + + static size_t genProgramSerializeToBinary(gbe_program program, char **binary, int binary_type) { using namespace gbe; size_t sz; std::ostringstream oss; GenProgram *prog = (GenProgram*)program; -if ((sz = prog->serializeToBin(oss)) == 0) { - *binary = 0; +//0 means GEN binary, 1 means LLVM bitcode compiled object, 2 means LLVM bitcode library +if(binary_type == 0){ + if ((sz = prog->serializeToBin(oss)) == 0) { +*binary = 0; +return 0; + } + + *binary = (char *)malloc(sizeof(char) * sz); + memcpy(*binary, oss.str().c_str(), sz*sizeof(char)); + return sz; +}else{ +#ifdef GBE_COMPILER_AVAILABLE + char llStr[] = "/tmp/XX.ll"; + int llFd = mkstemps(llStr, 3); + close(llFd); + const std::string llName = std::string(llStr); + std::string errorInfo; +#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR > 3) + auto mode = llvm::sys::fs::F_Binary; +#else + auto mode = llvm::raw_fd_ostream::F_Binary; +#endif + llvm::raw_fd_ostream OS(llName.c_str(), errorInfo, mode); + llvm::WriteBitcodeToFile((llvm::Module*)prog->module, OS); + OS.close(); + FILE* pfile = fopen(llName.c_str(), "rb"); + fseek(pfile, 0, SEEK_END); + int llsz = ftell(pfile); + rewind(pfile); + *binary = (char *)malloc(sizeof(char) * (llsz+1) ); + int result = fread(*binary+1, 1, llsz, pfile); + if(result != llsz){ +GBE_ASSERT(0); + } + *(*binary) = binary_type; + fclose(pfile); + remove(llName.c_str()); + return llsz+1; +#else return 0; +#endif } - -*binary = (char *)malloc(sizeof(char) * sz); -memcpy(*binary, oss.str().c_str(), sz*sizeof(char)); -return sz; } static gbe_program genProgramNewFromLLVM(uint32_t deviceID, @@ -337,6 +399,7 @@ namespace gbe { void genSetupCallBacks(void) { gbe_program_new_from_binary = gbe::genProgramNewFromBinary; + gbe_program_new_from_llvm_binary = gbe::genProgramNewFromLLVMBinary; gbe_program_serialize_to_binary = gbe::genProgramSerializeToBinary; gbe_program_new_from_llvm = gbe::genProgramNewFromLLVM; gbe_program_new_gen_program = gbe::genProgramNewGenProgram; diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp index 45983fd..98e7ab7 100644 --- a/backend/src/backend/program.cpp +++ b/backend/src/backend/program.cpp @@
[Beignet] [PATCH V2] add binary type support for compiled object and library.
From: Luo save the llvm bitcode to program->binary: insert a byte in front of the bitcode stands for binary type(1 means COMPILED_OBJECT, 2 means LIBRARY); load the binary to module by ParseIR. create random directory to save compile header files. use strncpy and strncat to replace strcpy and strcat. Signed-off-by: Luo --- backend/src/backend/gen_program.cpp | 68 ++- backend/src/backend/program.cpp | 1 + backend/src/backend/program.h | 8 +++-- src/cl_api.c| 25 +++-- src/cl_gbe_loader.cpp | 11 -- src/cl_gbe_loader.h | 10 +++--- src/cl_program.c| 72 + src/cl_program.h| 1 + 8 files changed, 169 insertions(+), 27 deletions(-) diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index 300741e..b31 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -35,6 +35,12 @@ #include "llvm/Linker.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/IRReader/IRReader.h" #include "backend/program.h" #include "backend/gen_program.h" @@ -55,6 +61,7 @@ #include #include #include +#include namespace gbe { @@ -188,7 +195,7 @@ namespace gbe { static gbe_program genProgramNewFromBinary(uint32_t deviceID, const char *binary, size_t size) { using namespace gbe; std::string binary_content; -binary_content.assign(binary, size); +binary_content.assign(binary+5, size-5); GenProgram *program = GBE_NEW(GenProgram, deviceID); std::istringstream ifs(binary_content, std::ostringstream::binary); // FIXME we need to check the whether the current device ID match the binary file's. @@ -203,20 +210,64 @@ namespace gbe { return reinterpret_cast(program); } - static size_t genProgramSerializeToBinary(gbe_program program, char **binary) { + static gbe_program genProgramNewFromLLVMBinary(uint32_t deviceID, const char *binary, size_t size) { +#ifdef GBE_COMPILER_AVAILABLE +using namespace gbe; +std::string binary_content; +//the first byte stands for binary_type. +binary_content.assign(binary+1, size-1); +llvm::StringRef llvm_bin_str(binary_content); +llvm::LLVMContext& c = llvm::getGlobalContext(); +llvm::SMDiagnostic Err; +llvm::MemoryBuffer* memory_buffer = llvm::MemoryBuffer::getMemBuffer(llvm_bin_str, "llvm_bin_str"); +llvm::Module* module = llvm::ParseIR(memory_buffer, Err, c); +if(module == NULL){ + GBE_ASSERT(0); +} + +GenProgram *program = GBE_NEW(GenProgram, deviceID, module); + +//program->printStatus(0, std::cout); +return reinterpret_cast(program); +#else + return NULL; +#endif + } + + static size_t genProgramSerializeToBinary(gbe_program program, char **binary, int binary_type) { using namespace gbe; size_t sz; std::ostringstream oss; GenProgram *prog = (GenProgram*)program; -if ((sz = prog->serializeToBin(oss)) == 0) { - *binary = 0; +//0 means GEN binary, 1 means LLVM bitcode compiled object, 2 means LLVM bitcode library +if(binary_type == 0){ + if ((sz = prog->serializeToBin(oss)) == 0) { +*binary = NULL; +return 0; + } + + //add header to differetiate from llvm bitcode binary. + //the header length is 5 bytes: 1 binary type, 4 bitcode header. + *binary = (char *)malloc(sizeof(char) * (sz+5) ); + memset(*binary, 0, sizeof(char) * (sz+5) ); + memcpy(*binary+5, oss.str().c_str(), sz*sizeof(char)); + return sz+5; +}else{ +#ifdef GBE_COMPILER_AVAILABLE + std::string str; + llvm::raw_string_ostream OS(str); + llvm::WriteBitcodeToFile((llvm::Module*)prog->module, OS); + std::string& bin_str = OS.str(); + int llsz = bin_str.size(); + *binary = (char *)malloc(sizeof(char) * (llsz+1) ); + *(*binary) = binary_type; + memcpy(*binary+1, bin_str.c_str(), llsz); + return llsz+1; +#else return 0; +#endif } - -*binary = (char *)malloc(sizeof(char) * sz); -memcpy(*binary, oss.str().c_str(), sz*sizeof(char)); -return sz; } static gbe_program genProgramNewFromLLVM(uint32_t deviceID, @@ -337,6 +388,7 @@ namespace gbe { void genSetupCallBacks(void) { gbe_program_new_from_binary = gbe::genProgramNewFromBinary; + gbe_program_new_from_llvm_binary = gbe::genProgramNewFromLLVMBinary; gbe_program_serialize_to_binary = gbe::genProgramSerializeToBinary; gbe_program_new_from_llvm = gbe::genProgramNewFromLLVM; gbe_program_new_gen_program = gbe::genProgramNewGenProgram; diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp index 45983fd..98e7
[Beignet] [PATCH V2] add binary type support for compiled object and library.
From: Luo save the llvm bitcode to program->binary: insert a byte in front of the bitcode stands for binary type(1 means COMPILED_OBJECT, 2 means LIBRARY); load the binary to module by ParseIR. create random directory to save compile header files. use strncpy and strncat to replace strcpy and strcat. Signed-off-by: Luo --- backend/src/backend/gen_program.cpp | 68 ++- backend/src/backend/program.cpp | 1 + backend/src/backend/program.h | 8 +++-- src/cl_api.c| 25 +++-- src/cl_gbe_loader.cpp | 11 -- src/cl_gbe_loader.h | 10 +++--- src/cl_program.c| 72 + src/cl_program.h| 1 + 8 files changed, 169 insertions(+), 27 deletions(-) diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index 300741e..b31 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -35,6 +35,12 @@ #include "llvm/Linker.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/IRReader/IRReader.h" #include "backend/program.h" #include "backend/gen_program.h" @@ -55,6 +61,7 @@ #include #include #include +#include namespace gbe { @@ -188,7 +195,7 @@ namespace gbe { static gbe_program genProgramNewFromBinary(uint32_t deviceID, const char *binary, size_t size) { using namespace gbe; std::string binary_content; -binary_content.assign(binary, size); +binary_content.assign(binary+5, size-5); GenProgram *program = GBE_NEW(GenProgram, deviceID); std::istringstream ifs(binary_content, std::ostringstream::binary); // FIXME we need to check the whether the current device ID match the binary file's. @@ -203,20 +210,64 @@ namespace gbe { return reinterpret_cast(program); } - static size_t genProgramSerializeToBinary(gbe_program program, char **binary) { + static gbe_program genProgramNewFromLLVMBinary(uint32_t deviceID, const char *binary, size_t size) { +#ifdef GBE_COMPILER_AVAILABLE +using namespace gbe; +std::string binary_content; +//the first byte stands for binary_type. +binary_content.assign(binary+1, size-1); +llvm::StringRef llvm_bin_str(binary_content); +llvm::LLVMContext& c = llvm::getGlobalContext(); +llvm::SMDiagnostic Err; +llvm::MemoryBuffer* memory_buffer = llvm::MemoryBuffer::getMemBuffer(llvm_bin_str, "llvm_bin_str"); +llvm::Module* module = llvm::ParseIR(memory_buffer, Err, c); +if(module == NULL){ + GBE_ASSERT(0); +} + +GenProgram *program = GBE_NEW(GenProgram, deviceID, module); + +//program->printStatus(0, std::cout); +return reinterpret_cast(program); +#else + return NULL; +#endif + } + + static size_t genProgramSerializeToBinary(gbe_program program, char **binary, int binary_type) { using namespace gbe; size_t sz; std::ostringstream oss; GenProgram *prog = (GenProgram*)program; -if ((sz = prog->serializeToBin(oss)) == 0) { - *binary = 0; +//0 means GEN binary, 1 means LLVM bitcode compiled object, 2 means LLVM bitcode library +if(binary_type == 0){ + if ((sz = prog->serializeToBin(oss)) == 0) { +*binary = NULL; +return 0; + } + + //add header to differetiate from llvm bitcode binary. + //the header length is 5 bytes: 1 binary type, 4 bitcode header. + *binary = (char *)malloc(sizeof(char) * (sz+5) ); + memset(*binary, 0, sizeof(char) * (sz+5) ); + memcpy(*binary+5, oss.str().c_str(), sz*sizeof(char)); + return sz+5; +}else{ +#ifdef GBE_COMPILER_AVAILABLE + std::string str; + llvm::raw_string_ostream OS(str); + llvm::WriteBitcodeToFile((llvm::Module*)prog->module, OS); + std::string& bin_str = OS.str(); + int llsz = bin_str.size(); + *binary = (char *)malloc(sizeof(char) * (llsz+1) ); + *(*binary) = binary_type; + memcpy(*binary+1, bin_str.c_str(), llsz); + return llsz+1; +#else return 0; +#endif } - -*binary = (char *)malloc(sizeof(char) * sz); -memcpy(*binary, oss.str().c_str(), sz*sizeof(char)); -return sz; } static gbe_program genProgramNewFromLLVM(uint32_t deviceID, @@ -337,6 +388,7 @@ namespace gbe { void genSetupCallBacks(void) { gbe_program_new_from_binary = gbe::genProgramNewFromBinary; + gbe_program_new_from_llvm_binary = gbe::genProgramNewFromLLVMBinary; gbe_program_serialize_to_binary = gbe::genProgramSerializeToBinary; gbe_program_new_from_llvm = gbe::genProgramNewFromLLVM; gbe_program_new_gen_program = gbe::genProgramNewGenProgram; diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp index 45983fd..98e7
[Beignet] [PATCH V3] add binary type support for compiled object and library.
From: Luo save the llvm bitcode to program->binary: insert a byte in front of the bitcode stands for binary type(0 means GEN binary, 1 means COMPILED_OBJECT, 2 means LIBRARY); load the binary to module by ParseIR. create random directory to save compile header files. use strncpy and strncat to replace strcpy and strcat. Signed-off-by: Luo --- backend/src/backend/gen_program.cpp | 71 +++- backend/src/backend/program.cpp | 1 + backend/src/backend/program.h | 8 +++-- src/cl_api.c| 25 +++-- src/cl_gbe_loader.cpp | 11 -- src/cl_gbe_loader.h | 10 +++--- src/cl_program.c| 72 + src/cl_program.h| 1 + 8 files changed, 172 insertions(+), 27 deletions(-) diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index 300741e..8897dbb 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -35,6 +35,12 @@ #include "llvm/Linker.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/IRReader/IRReader.h" #include "backend/program.h" #include "backend/gen_program.h" @@ -55,6 +61,7 @@ #include #include #include +#include namespace gbe { @@ -188,7 +195,8 @@ namespace gbe { static gbe_program genProgramNewFromBinary(uint32_t deviceID, const char *binary, size_t size) { using namespace gbe; std::string binary_content; -binary_content.assign(binary, size); +//the first 5 bytes are header to differentiate from llvm bitcode binary. +binary_content.assign(binary+5, size-5); GenProgram *program = GBE_NEW(GenProgram, deviceID); std::istringstream ifs(binary_content, std::ostringstream::binary); // FIXME we need to check the whether the current device ID match the binary file's. @@ -203,20 +211,66 @@ namespace gbe { return reinterpret_cast(program); } - static size_t genProgramSerializeToBinary(gbe_program program, char **binary) { + static gbe_program genProgramNewFromLLVMBinary(uint32_t deviceID, const char *binary, size_t size) { +#ifdef GBE_COMPILER_AVAILABLE +using namespace gbe; +std::string binary_content; +//the first byte stands for binary_type. +binary_content.assign(binary+1, size-1); +llvm::StringRef llvm_bin_str(binary_content); +llvm::LLVMContext& c = llvm::getGlobalContext(); +llvm::SMDiagnostic Err; +llvm::MemoryBuffer* memory_buffer = llvm::MemoryBuffer::getMemBuffer(llvm_bin_str, "llvm_bin_str"); +acquireLLVMContextLock(); +llvm::Module* module = llvm::ParseIR(memory_buffer, Err, c); +releaseLLVMContextLock(); +if(module == NULL){ + GBE_ASSERT(0); +} + +GenProgram *program = GBE_NEW(GenProgram, deviceID, module); + +//program->printStatus(0, std::cout); +return reinterpret_cast(program); +#else + return NULL; +#endif + } + + static size_t genProgramSerializeToBinary(gbe_program program, char **binary, int binary_type) { using namespace gbe; size_t sz; std::ostringstream oss; GenProgram *prog = (GenProgram*)program; -if ((sz = prog->serializeToBin(oss)) == 0) { - *binary = 0; +//0 means GEN binary, 1 means LLVM bitcode compiled object, 2 means LLVM bitcode library +if(binary_type == 0){ + if ((sz = prog->serializeToBin(oss)) == 0) { +*binary = NULL; +return 0; + } + + //add header to differetiate from llvm bitcode binary. + //the header length is 5 bytes: 1 binary type, 4 bitcode header. + *binary = (char *)malloc(sizeof(char) * (sz+5) ); + memset(*binary, 0, sizeof(char) * (sz+5) ); + memcpy(*binary+5, oss.str().c_str(), sz*sizeof(char)); + return sz+5; +}else{ +#ifdef GBE_COMPILER_AVAILABLE + std::string str; + llvm::raw_string_ostream OS(str); + llvm::WriteBitcodeToFile((llvm::Module*)prog->module, OS); + std::string& bin_str = OS.str(); + int llsz = bin_str.size(); + *binary = (char *)malloc(sizeof(char) * (llsz+1) ); + *(*binary) = binary_type; + memcpy(*binary+1, bin_str.c_str(), llsz); + return llsz+1; +#else return 0; +#endif } - -*binary = (char *)malloc(sizeof(char) * sz); -memcpy(*binary, oss.str().c_str(), sz*sizeof(char)); -return sz; } static gbe_program genProgramNewFromLLVM(uint32_t deviceID, @@ -337,6 +391,7 @@ namespace gbe { void genSetupCallBacks(void) { gbe_program_new_from_binary = gbe::genProgramNewFromBinary; + gbe_program_new_from_llvm_binary = gbe::genProgramNewFromLLVMBinary; gbe_program_serialize_to_binary = gbe::genProgramSerializeToBinary; gbe_program_new_from_llvm = gbe::genProgramNewFromLLVM
[Beignet] [PATCH] fix clEnqueueMarkerWithWaitList bug when input event is null.
From: Luo --- src/cl_api.c | 5 + src/cl_event.c | 11 --- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/cl_api.c b/src/cl_api.c index 2f287e3..c4a8730 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -2992,10 +2992,7 @@ clEnqueueMarkerWithWaitList(cl_command_queue command_queue, { cl_int err = CL_SUCCESS; CHECK_QUEUE(command_queue); - if(event == NULL) { -err = CL_INVALID_VALUE; -goto error; - } + TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, command_queue->ctx); cl_event_marker_with_wait_list(command_queue, num_events_in_wait_list, event_wait_list, event); diff --git a/src/cl_event.c b/src/cl_event.c index c93d245..76d6760 100644 --- a/src/cl_event.c +++ b/src/cl_event.c @@ -476,11 +476,16 @@ cl_int cl_event_marker_with_wait_list(cl_command_queue queue, cl_event* event) { enqueue_data data = { 0 }; + cl_event e; - *event = cl_event_new(queue->ctx, queue, CL_COMMAND_MARKER, CL_TRUE); - if(event == NULL) + e = cl_event_new(queue->ctx, queue, CL_COMMAND_MARKER, CL_TRUE); + if(e == NULL) return CL_OUT_OF_HOST_MEMORY; + if(event != NULL ){ +*event = e; + } + //enqueues a marker command which waits for either a list of events to complete, or if the list is //empty it waits for all commands previously enqueued in command_queue to complete before it completes. if(num_events_in_wait_list > 0){ @@ -499,7 +504,7 @@ cl_int cl_event_marker_with_wait_list(cl_command_queue queue, cl_gpgpu_event_update_status(queue->last_event->gpgpu_event, 1); } - cl_event_set_status(*event, CL_COMPLETE); + cl_event_set_status(e, CL_COMPLETE); return CL_SUCCESS; } -- 1.8.1.2 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH V4] add binary type support for compiled object and library.
From: Luo save the llvm bitcode to program->binary: insert a byte in front of the bitcode stands for binary type(0 means GEN binary, 1 means COMPILED_OBJECT, 2 means LIBRARY); load the binary to module by ParseIR. create random directory to save compile header files. use strncpy and strncat to replace strcpy and strcat. Signed-off-by: Luo --- backend/src/backend/gen_program.cpp | 71 +++- backend/src/backend/program.cpp | 1 + backend/src/backend/program.h | 8 +++-- backend/src/gbe_bin_generater.cpp | 15 +++- src/cl_api.c| 25 +++-- src/cl_gbe_loader.cpp | 11 -- src/cl_gbe_loader.h | 10 +++--- src/cl_program.c| 72 + src/cl_program.h| 1 + 9 files changed, 186 insertions(+), 28 deletions(-) diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index 300741e..8897dbb 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -35,6 +35,12 @@ #include "llvm/Linker.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/IRReader/IRReader.h" #include "backend/program.h" #include "backend/gen_program.h" @@ -55,6 +61,7 @@ #include #include #include +#include namespace gbe { @@ -188,7 +195,8 @@ namespace gbe { static gbe_program genProgramNewFromBinary(uint32_t deviceID, const char *binary, size_t size) { using namespace gbe; std::string binary_content; -binary_content.assign(binary, size); +//the first 5 bytes are header to differentiate from llvm bitcode binary. +binary_content.assign(binary+5, size-5); GenProgram *program = GBE_NEW(GenProgram, deviceID); std::istringstream ifs(binary_content, std::ostringstream::binary); // FIXME we need to check the whether the current device ID match the binary file's. @@ -203,20 +211,66 @@ namespace gbe { return reinterpret_cast(program); } - static size_t genProgramSerializeToBinary(gbe_program program, char **binary) { + static gbe_program genProgramNewFromLLVMBinary(uint32_t deviceID, const char *binary, size_t size) { +#ifdef GBE_COMPILER_AVAILABLE +using namespace gbe; +std::string binary_content; +//the first byte stands for binary_type. +binary_content.assign(binary+1, size-1); +llvm::StringRef llvm_bin_str(binary_content); +llvm::LLVMContext& c = llvm::getGlobalContext(); +llvm::SMDiagnostic Err; +llvm::MemoryBuffer* memory_buffer = llvm::MemoryBuffer::getMemBuffer(llvm_bin_str, "llvm_bin_str"); +acquireLLVMContextLock(); +llvm::Module* module = llvm::ParseIR(memory_buffer, Err, c); +releaseLLVMContextLock(); +if(module == NULL){ + GBE_ASSERT(0); +} + +GenProgram *program = GBE_NEW(GenProgram, deviceID, module); + +//program->printStatus(0, std::cout); +return reinterpret_cast(program); +#else + return NULL; +#endif + } + + static size_t genProgramSerializeToBinary(gbe_program program, char **binary, int binary_type) { using namespace gbe; size_t sz; std::ostringstream oss; GenProgram *prog = (GenProgram*)program; -if ((sz = prog->serializeToBin(oss)) == 0) { - *binary = 0; +//0 means GEN binary, 1 means LLVM bitcode compiled object, 2 means LLVM bitcode library +if(binary_type == 0){ + if ((sz = prog->serializeToBin(oss)) == 0) { +*binary = NULL; +return 0; + } + + //add header to differetiate from llvm bitcode binary. + //the header length is 5 bytes: 1 binary type, 4 bitcode header. + *binary = (char *)malloc(sizeof(char) * (sz+5) ); + memset(*binary, 0, sizeof(char) * (sz+5) ); + memcpy(*binary+5, oss.str().c_str(), sz*sizeof(char)); + return sz+5; +}else{ +#ifdef GBE_COMPILER_AVAILABLE + std::string str; + llvm::raw_string_ostream OS(str); + llvm::WriteBitcodeToFile((llvm::Module*)prog->module, OS); + std::string& bin_str = OS.str(); + int llsz = bin_str.size(); + *binary = (char *)malloc(sizeof(char) * (llsz+1) ); + *(*binary) = binary_type; + memcpy(*binary+1, bin_str.c_str(), llsz); + return llsz+1; +#else return 0; +#endif } - -*binary = (char *)malloc(sizeof(char) * sz); -memcpy(*binary, oss.str().c_str(), sz*sizeof(char)); -return sz; } static gbe_program genProgramNewFromLLVM(uint32_t deviceID, @@ -337,6 +391,7 @@ namespace gbe { void genSetupCallBacks(void) { gbe_program_new_from_binary = gbe::genProgramNewFromBinary; + gbe_program_new_from_llvm_binary = gbe::genProgramNewFromLLVMBinary; gbe_program_serialize_to_binary = gbe::genProgramSerializeToBinary; gbe
[Beignet] [PATCH V5] add binary type support for compiled object and library.
From: Luo save the llvm bitcode to program->binary: insert a byte in front of the bitcode stands for binary type(0 means GEN binary, 1 means COMPILED_OBJECT, 2 means LIBRARY); load the binary to module by ParseIR. create random directory to save compile header files. use strncpy and strncat to replace strcpy and strcat. Signed-off-by: Luo --- backend/src/backend/gen_program.cpp | 71 +++- backend/src/backend/program.cpp | 1 + backend/src/backend/program.h | 8 +++-- backend/src/gbe_bin_generater.cpp | 16 - src/cl_api.c| 25 +++-- src/cl_gbe_loader.cpp | 11 -- src/cl_gbe_loader.h | 10 +++--- src/cl_program.c| 72 + src/cl_program.h| 1 + 9 files changed, 187 insertions(+), 28 deletions(-) diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index 300741e..8897dbb 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -35,6 +35,12 @@ #include "llvm/Linker.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/IRReader/IRReader.h" #include "backend/program.h" #include "backend/gen_program.h" @@ -55,6 +61,7 @@ #include #include #include +#include namespace gbe { @@ -188,7 +195,8 @@ namespace gbe { static gbe_program genProgramNewFromBinary(uint32_t deviceID, const char *binary, size_t size) { using namespace gbe; std::string binary_content; -binary_content.assign(binary, size); +//the first 5 bytes are header to differentiate from llvm bitcode binary. +binary_content.assign(binary+5, size-5); GenProgram *program = GBE_NEW(GenProgram, deviceID); std::istringstream ifs(binary_content, std::ostringstream::binary); // FIXME we need to check the whether the current device ID match the binary file's. @@ -203,20 +211,66 @@ namespace gbe { return reinterpret_cast(program); } - static size_t genProgramSerializeToBinary(gbe_program program, char **binary) { + static gbe_program genProgramNewFromLLVMBinary(uint32_t deviceID, const char *binary, size_t size) { +#ifdef GBE_COMPILER_AVAILABLE +using namespace gbe; +std::string binary_content; +//the first byte stands for binary_type. +binary_content.assign(binary+1, size-1); +llvm::StringRef llvm_bin_str(binary_content); +llvm::LLVMContext& c = llvm::getGlobalContext(); +llvm::SMDiagnostic Err; +llvm::MemoryBuffer* memory_buffer = llvm::MemoryBuffer::getMemBuffer(llvm_bin_str, "llvm_bin_str"); +acquireLLVMContextLock(); +llvm::Module* module = llvm::ParseIR(memory_buffer, Err, c); +releaseLLVMContextLock(); +if(module == NULL){ + GBE_ASSERT(0); +} + +GenProgram *program = GBE_NEW(GenProgram, deviceID, module); + +//program->printStatus(0, std::cout); +return reinterpret_cast(program); +#else + return NULL; +#endif + } + + static size_t genProgramSerializeToBinary(gbe_program program, char **binary, int binary_type) { using namespace gbe; size_t sz; std::ostringstream oss; GenProgram *prog = (GenProgram*)program; -if ((sz = prog->serializeToBin(oss)) == 0) { - *binary = 0; +//0 means GEN binary, 1 means LLVM bitcode compiled object, 2 means LLVM bitcode library +if(binary_type == 0){ + if ((sz = prog->serializeToBin(oss)) == 0) { +*binary = NULL; +return 0; + } + + //add header to differetiate from llvm bitcode binary. + //the header length is 5 bytes: 1 binary type, 4 bitcode header. + *binary = (char *)malloc(sizeof(char) * (sz+5) ); + memset(*binary, 0, sizeof(char) * (sz+5) ); + memcpy(*binary+5, oss.str().c_str(), sz*sizeof(char)); + return sz+5; +}else{ +#ifdef GBE_COMPILER_AVAILABLE + std::string str; + llvm::raw_string_ostream OS(str); + llvm::WriteBitcodeToFile((llvm::Module*)prog->module, OS); + std::string& bin_str = OS.str(); + int llsz = bin_str.size(); + *binary = (char *)malloc(sizeof(char) * (llsz+1) ); + *(*binary) = binary_type; + memcpy(*binary+1, bin_str.c_str(), llsz); + return llsz+1; +#else return 0; +#endif } - -*binary = (char *)malloc(sizeof(char) * sz); -memcpy(*binary, oss.str().c_str(), sz*sizeof(char)); -return sz; } static gbe_program genProgramNewFromLLVM(uint32_t deviceID, @@ -337,6 +391,7 @@ namespace gbe { void genSetupCallBacks(void) { gbe_program_new_from_binary = gbe::genProgramNewFromBinary; + gbe_program_new_from_llvm_binary = gbe::genProgramNewFromLLVMBinary; gbe_program_serialize_to_binary = gbe::genProgramSerializeToBinary; gb
[Beignet] [PATCH v6] add binary type support for compiled object and library.
From: Luo save the llvm bitcode to program->binary: insert a byte in front of the bitcode stands for binary type(0 means GEN binary, 1 means COMPILED_OBJECT, 2 means LIBRARY); load the binary to module by ParseIR. create random directory to save compile header files. use strncpy and strncat to replace strcpy and strcat. v6: fix enqueue_copy_fill bug, use '\0' instead of 0 in the header. Signed-off-by: Luo --- backend/src/backend/gen_program.cpp | 71 +++- backend/src/backend/program.cpp | 1 + backend/src/backend/program.h | 8 +++-- backend/src/gbe_bin_generater.cpp | 20 ++- src/cl_api.c| 25 +++-- src/cl_gbe_loader.cpp | 11 -- src/cl_gbe_loader.h | 10 +++--- src/cl_program.c| 72 + src/cl_program.h| 1 + 9 files changed, 191 insertions(+), 28 deletions(-) diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index 300741e..8897dbb 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -35,6 +35,12 @@ #include "llvm/Linker.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/IRReader/IRReader.h" #include "backend/program.h" #include "backend/gen_program.h" @@ -55,6 +61,7 @@ #include #include #include +#include namespace gbe { @@ -188,7 +195,8 @@ namespace gbe { static gbe_program genProgramNewFromBinary(uint32_t deviceID, const char *binary, size_t size) { using namespace gbe; std::string binary_content; -binary_content.assign(binary, size); +//the first 5 bytes are header to differentiate from llvm bitcode binary. +binary_content.assign(binary+5, size-5); GenProgram *program = GBE_NEW(GenProgram, deviceID); std::istringstream ifs(binary_content, std::ostringstream::binary); // FIXME we need to check the whether the current device ID match the binary file's. @@ -203,20 +211,66 @@ namespace gbe { return reinterpret_cast(program); } - static size_t genProgramSerializeToBinary(gbe_program program, char **binary) { + static gbe_program genProgramNewFromLLVMBinary(uint32_t deviceID, const char *binary, size_t size) { +#ifdef GBE_COMPILER_AVAILABLE +using namespace gbe; +std::string binary_content; +//the first byte stands for binary_type. +binary_content.assign(binary+1, size-1); +llvm::StringRef llvm_bin_str(binary_content); +llvm::LLVMContext& c = llvm::getGlobalContext(); +llvm::SMDiagnostic Err; +llvm::MemoryBuffer* memory_buffer = llvm::MemoryBuffer::getMemBuffer(llvm_bin_str, "llvm_bin_str"); +acquireLLVMContextLock(); +llvm::Module* module = llvm::ParseIR(memory_buffer, Err, c); +releaseLLVMContextLock(); +if(module == NULL){ + GBE_ASSERT(0); +} + +GenProgram *program = GBE_NEW(GenProgram, deviceID, module); + +//program->printStatus(0, std::cout); +return reinterpret_cast(program); +#else + return NULL; +#endif + } + + static size_t genProgramSerializeToBinary(gbe_program program, char **binary, int binary_type) { using namespace gbe; size_t sz; std::ostringstream oss; GenProgram *prog = (GenProgram*)program; -if ((sz = prog->serializeToBin(oss)) == 0) { - *binary = 0; +//0 means GEN binary, 1 means LLVM bitcode compiled object, 2 means LLVM bitcode library +if(binary_type == 0){ + if ((sz = prog->serializeToBin(oss)) == 0) { +*binary = NULL; +return 0; + } + + //add header to differetiate from llvm bitcode binary. + //the header length is 5 bytes: 1 binary type, 4 bitcode header. + *binary = (char *)malloc(sizeof(char) * (sz+5) ); + memset(*binary, 0, sizeof(char) * (sz+5) ); + memcpy(*binary+5, oss.str().c_str(), sz*sizeof(char)); + return sz+5; +}else{ +#ifdef GBE_COMPILER_AVAILABLE + std::string str; + llvm::raw_string_ostream OS(str); + llvm::WriteBitcodeToFile((llvm::Module*)prog->module, OS); + std::string& bin_str = OS.str(); + int llsz = bin_str.size(); + *binary = (char *)malloc(sizeof(char) * (llsz+1) ); + *(*binary) = binary_type; + memcpy(*binary+1, bin_str.c_str(), llsz); + return llsz+1; +#else return 0; +#endif } - -*binary = (char *)malloc(sizeof(char) * sz); -memcpy(*binary, oss.str().c_str(), sz*sizeof(char)); -return sz; } static gbe_program genProgramNewFromLLVM(uint32_t deviceID, @@ -337,6 +391,7 @@ namespace gbe { void genSetupCallBacks(void) { gbe_program_new_from_binary = gbe::genProgramNewFromBinary; + gbe_program_new_from_llvm_binary = gbe::genProgramNewFromLLVMBinary; gb
[Beignet] [PATCH v7] add binary type support for compiled object and library.
From: Luo save the llvm bitcode to program->binary: insert a byte in front of the bitcode stands for binary type(0 means GEN binary, 1 means COMPILED_OBJECT, 2 means LIBRARY); load the binary to module by ParseIR. create random directory to save compile header files. use strncpy and strncat to replace strcpy and strcat. v6: fix enqueue_copy_fill bug, use '\0' instead of 0 in the header. v7 binary header format issue: fix test_load_program_from_bin bug of standalone kernel generated by gbe_bin_generater. Signed-off-by: Luo --- backend/src/backend/gen_program.cpp | 71 +++- backend/src/backend/program.cpp | 1 + backend/src/backend/program.h | 8 +++-- backend/src/gbe_bin_generater.cpp | 58 -- src/cl_api.c| 25 +++-- src/cl_gbe_loader.cpp | 11 -- src/cl_gbe_loader.h | 10 +++--- src/cl_program.c| 72 + src/cl_program.h| 1 + 9 files changed, 211 insertions(+), 46 deletions(-) diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index 300741e..8897dbb 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -35,6 +35,12 @@ #include "llvm/Linker.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/IRReader/IRReader.h" #include "backend/program.h" #include "backend/gen_program.h" @@ -55,6 +61,7 @@ #include #include #include +#include namespace gbe { @@ -188,7 +195,8 @@ namespace gbe { static gbe_program genProgramNewFromBinary(uint32_t deviceID, const char *binary, size_t size) { using namespace gbe; std::string binary_content; -binary_content.assign(binary, size); +//the first 5 bytes are header to differentiate from llvm bitcode binary. +binary_content.assign(binary+5, size-5); GenProgram *program = GBE_NEW(GenProgram, deviceID); std::istringstream ifs(binary_content, std::ostringstream::binary); // FIXME we need to check the whether the current device ID match the binary file's. @@ -203,20 +211,66 @@ namespace gbe { return reinterpret_cast(program); } - static size_t genProgramSerializeToBinary(gbe_program program, char **binary) { + static gbe_program genProgramNewFromLLVMBinary(uint32_t deviceID, const char *binary, size_t size) { +#ifdef GBE_COMPILER_AVAILABLE +using namespace gbe; +std::string binary_content; +//the first byte stands for binary_type. +binary_content.assign(binary+1, size-1); +llvm::StringRef llvm_bin_str(binary_content); +llvm::LLVMContext& c = llvm::getGlobalContext(); +llvm::SMDiagnostic Err; +llvm::MemoryBuffer* memory_buffer = llvm::MemoryBuffer::getMemBuffer(llvm_bin_str, "llvm_bin_str"); +acquireLLVMContextLock(); +llvm::Module* module = llvm::ParseIR(memory_buffer, Err, c); +releaseLLVMContextLock(); +if(module == NULL){ + GBE_ASSERT(0); +} + +GenProgram *program = GBE_NEW(GenProgram, deviceID, module); + +//program->printStatus(0, std::cout); +return reinterpret_cast(program); +#else + return NULL; +#endif + } + + static size_t genProgramSerializeToBinary(gbe_program program, char **binary, int binary_type) { using namespace gbe; size_t sz; std::ostringstream oss; GenProgram *prog = (GenProgram*)program; -if ((sz = prog->serializeToBin(oss)) == 0) { - *binary = 0; +//0 means GEN binary, 1 means LLVM bitcode compiled object, 2 means LLVM bitcode library +if(binary_type == 0){ + if ((sz = prog->serializeToBin(oss)) == 0) { +*binary = NULL; +return 0; + } + + //add header to differetiate from llvm bitcode binary. + //the header length is 5 bytes: 1 binary type, 4 bitcode header. + *binary = (char *)malloc(sizeof(char) * (sz+5) ); + memset(*binary, 0, sizeof(char) * (sz+5) ); + memcpy(*binary+5, oss.str().c_str(), sz*sizeof(char)); + return sz+5; +}else{ +#ifdef GBE_COMPILER_AVAILABLE + std::string str; + llvm::raw_string_ostream OS(str); + llvm::WriteBitcodeToFile((llvm::Module*)prog->module, OS); + std::string& bin_str = OS.str(); + int llsz = bin_str.size(); + *binary = (char *)malloc(sizeof(char) * (llsz+1) ); + *(*binary) = binary_type; + memcpy(*binary+1, bin_str.c_str(), llsz); + return llsz+1; +#else return 0; +#endif } - -*binary = (char *)malloc(sizeof(char) * sz); -memcpy(*binary, oss.str().c_str(), sz*sizeof(char)); -return sz; } static gbe_program genProgramNewFromLLVM(uint32_t deviceID, @@ -337,6 +391,7 @@ namespace gbe { void genSetupCallBacks(void) {
[Beignet] [PATCH] [opencl-1.2] implement API clEnqueueFillImage.
From: Luo enqueues a command to fill an image object with a specified color. fix typo cl_context_get_static_kernel_from_bin. Signed-off-by: Luo --- src/CMakeLists.txt | 4 +- src/cl_api.c | 73 + src/cl_context.c | 2 +- src/cl_context.h | 7 +- src/cl_enqueue.c | 1 + src/cl_enqueue.h | 1 + src/cl_gt_device.h | 7 +- src/cl_khr_icd.c | 2 +- src/cl_mem.c | 106 - src/cl_mem.h | 3 + src/kernels/cl_internal_fill_image_1d.cl | 14 src/kernels/cl_internal_fill_image_1d_array.cl | 15 src/kernels/cl_internal_fill_image_2d.cl | 15 src/kernels/cl_internal_fill_image_2d_array.cl | 16 src/kernels/cl_internal_fill_image_3d.cl | 16 15 files changed, 257 insertions(+), 25 deletions(-) create mode 100644 src/kernels/cl_internal_fill_image_1d.cl create mode 100644 src/kernels/cl_internal_fill_image_1d_array.cl create mode 100644 src/kernels/cl_internal_fill_image_2d.cl create mode 100644 src/kernels/cl_internal_fill_image_2d_array.cl create mode 100644 src/kernels/cl_internal_fill_image_3d.cl diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8651af6..3d5ce4d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -47,7 +47,9 @@ cl_internal_copy_image_2d_to_buffer cl_internal_copy_image_3d_to_buffer cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_3d cl_internal_fill_buf_align8 cl_internal_fill_buf_align4 cl_internal_fill_buf_align2 cl_internal_fill_buf_unalign -cl_internal_fill_buf_align128) +cl_internal_fill_buf_align128 cl_internal_fill_image_1d +cl_internal_fill_image_1d_array cl_internal_fill_image_2d +cl_internal_fill_image_2d_array cl_internal_fill_image_3d) set (BUILT_IN_NAME cl_internal_built_in_kernel) MakeBuiltInKernelStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}") MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}") diff --git a/src/cl_api.c b/src/cl_api.c index 32f91d7..c93957f 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -1812,6 +1812,79 @@ error: } cl_int +clEnqueueFillImage(cl_command_queue command_queue, + cl_mem image, + const void * fill_color, + const size_t * porigin, + const size_t * pregion, + cl_uintnum_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + cl_int err = CL_SUCCESS; + enqueue_data *data, no_wait_data = { 0 }; + + CHECK_QUEUE(command_queue); + CHECK_IMAGE(image, src_image); + FIXUP_IMAGE_REGION(src_image, pregion, region); + FIXUP_IMAGE_ORIGIN(src_image, porigin, origin); + + if (command_queue->ctx != image->ctx) { +err = CL_INVALID_CONTEXT; +goto error; + } + + if (fill_color == NULL) { +err = CL_INVALID_VALUE; +goto error; + } + + if (!origin || !region || origin[0] + region[0] > src_image->w || origin[1] + region[1] > src_image->h || origin[2] + region[2] > src_image->depth) { + err = CL_INVALID_VALUE; + goto error; + } + + if (src_image->image_type == CL_MEM_OBJECT_IMAGE2D && (origin[2] != 0 || region[2] != 1)){ +err = CL_INVALID_VALUE; +goto error; + } + + if (src_image->image_type == CL_MEM_OBJECT_IMAGE1D && (origin[2] != 0 ||origin[1] != 0 || region[2] != 1 || region[1] != 1)){ +err = CL_INVALID_VALUE; +goto error; + } + + err = cl_image_fill(command_queue, fill_color, src_image, origin, region); + if (err) { +goto error; + } + + TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, image->ctx); + + data = &no_wait_data; + data->type = EnqueueFillImage; + data->queue = command_queue; + + if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, + event, data, CL_COMMAND_FILL_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) { +if (event && (*event)->type != CL_COMMAND_USER +&& (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) { + cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT); +} + +err = cl_command_queue_flush(command_queue); + } + + if(b_output_kernel_perf) +time_end(command_queue->ctx, "beignet internal kernel : cl_fill_image", "", command_queue); + + return 0; + + error: + return err; +} + +cl_int clEnqueueFillBuffer(cl_command_queue command_queue, cl_mem buffer, const void * pattern, diff --git a/src/cl_context.c b/src/cl_context.c index 8f42a58..152faf3 100644 --- a/src/cl_context.c +++ b/src/cl_context.c @@ -319,7 +319,7 @@ cl_contex
[Beignet] [PATCH] add cpu copy for 1Darray and 2darray related copy APIs.
From: Luo detail cases: 1Darray, 2Darray, 2Darrayto2D, 2Darrayto3D, 2Dto2Darray, 3Dto2Darray. 1d used gpu copy. Signed-off-by: Luo --- src/CMakeLists.txt | 4 +- src/cl_context.h | 1 + src/cl_mem.c | 73 +- src/cl_mem.h | 4 ++ src/kernels/cl_internal_copy_image_1d_to_1d.cl | 19 +++ 5 files changed, 97 insertions(+), 4 deletions(-) create mode 100644 src/kernels/cl_internal_copy_image_1d_to_1d.cl diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8651af6..82b6df0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -41,8 +41,8 @@ set (KERNEL_STR_FILES) set (KERNEL_NAMES cl_internal_copy_buf_align4 cl_internal_copy_buf_align16 cl_internal_copy_buf_unalign_same_offset cl_internal_copy_buf_unalign_dst_offset cl_internal_copy_buf_unalign_src_offset -cl_internal_copy_buf_rect cl_internal_copy_image_2d_to_2d cl_internal_copy_image_3d_to_2d -cl_internal_copy_image_2d_to_3d cl_internal_copy_image_3d_to_3d +cl_internal_copy_buf_rect cl_internal_copy_image_1d_to_1d cl_internal_copy_image_2d_to_2d +cl_internal_copy_image_3d_to_2d cl_internal_copy_image_2d_to_3d cl_internal_copy_image_3d_to_3d cl_internal_copy_image_2d_to_buffer cl_internal_copy_image_3d_to_buffer cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_3d cl_internal_fill_buf_align8 cl_internal_fill_buf_align4 diff --git a/src/cl_context.h b/src/cl_context.h index cba0a0a..74e31c7 100644 --- a/src/cl_context.h +++ b/src/cl_context.h @@ -47,6 +47,7 @@ enum _cl_internal_ker_type { CL_ENQUEUE_COPY_BUFFER_UNALIGN_DST_OFFSET, CL_ENQUEUE_COPY_BUFFER_UNALIGN_SRC_OFFSET, CL_ENQUEUE_COPY_BUFFER_RECT, + CL_ENQUEUE_COPY_IMAGE_1D_TO_1D, //copy image 1d to image 1d CL_ENQUEUE_COPY_IMAGE_2D_TO_2D, //copy image 2d to image 2d CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, //copy image 3d to image 2d CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, //copy image 2d to image 3d diff --git a/src/cl_mem.c b/src/cl_mem.c index e0c4ec9..8bb7215 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -542,6 +542,38 @@ cl_mem_copy_image_region(const size_t *origin, const size_t *region, } } +void +cl_mem_copy_image_to_image(const size_t *dst_origin,const size_t *src_origin, const size_t *region, + const struct _cl_mem_image *dst_image, const struct _cl_mem_image *src_image) +{ + //printf("origin:%u,%u,%u to %u,%u,%u\n", src_origin[0],src_origin[1], src_origin[2], dst_origin[0],dst_origin[1], dst_origin[2]); + //printf("region:%u,%u,%u \n", region[0],region[1], region[2]); + //printf("pitch:%u,%u to %u,%u\n", src_image->row_pitch, src_image->slice_pitch,dst_image->row_pitch, dst_image->slice_pitch); + + char* dst= cl_mem_map_auto((cl_mem)dst_image); + char* src= cl_mem_map_auto((cl_mem)src_image); + size_t dst_offset = dst_image->bpp * dst_origin[0] + dst_image->row_pitch * dst_origin[1] + dst_image->slice_pitch * dst_origin[2]; + size_t src_offset = src_image->bpp * src_origin[0] + src_image->row_pitch * src_origin[1] + src_image->slice_pitch * src_origin[2]; + dst= (char*)dst+ dst_offset; + src= (char*)src+ src_offset; + cl_uint y, z; + for (z = 0; z < region[2]; z++) { +const char* src_ptr = src; +char* dst_ptr = dst; +for (y = 0; y < region[1]; y++) { + memcpy(dst_ptr, src_ptr, src_image->bpp*region[0]); + src_ptr += src_image->row_pitch; + dst_ptr += dst_image->row_pitch; +} +src = (char*)src + src_image->slice_pitch; +dst = (char*)dst + dst_image->slice_pitch; + } + + cl_mem_unmap_auto((cl_mem)src_image); + cl_mem_unmap_auto((cl_mem)dst_image); + +} + static void cl_mem_copy_image(struct _cl_mem_image *image, size_t row_pitch, @@ -1377,7 +1409,16 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image assert(src_image->base.ctx == dst_image->base.ctx); /* setup the kernel and run. */ - if(src_image->image_type == CL_MEM_OBJECT_IMAGE2D) { + if(src_image->image_type == CL_MEM_OBJECT_IMAGE1D) { +if(dst_image->image_type == CL_MEM_OBJECT_IMAGE1D) { + extern char cl_internal_copy_image_1d_to_1d_str[]; + extern int cl_internal_copy_image_1d_to_1d_str_size; + + ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_1D_TO_1D, + cl_internal_copy_image_1d_to_1d_str, (size_t)cl_internal_copy_image_1d_to_1d_str_size, NULL); +} + + }else if(src_image->image_type == CL_MEM_OBJECT_IMAGE2D) { if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D) { extern char cl_internal_copy_image_2d_to_2d_str[]; extern int cl_internal_copy_image_2d_to_2d_str_size; @@ -1390,8 +1431,33 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAG
[Beignet] [PATCH] fix enqueue_built_in_kernels bug.
From: Luo need asign the length to a local variable. Signed-off-by: Luo --- src/cl_gt_device.h | 1 + src/cl_program.c | 8 +--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h index d7855cd..63c9047 100644 --- a/src/cl_gt_device.h +++ b/src/cl_gt_device.h @@ -87,6 +87,7 @@ DECL_INFO_STRING(built_in_kernels, "__cl_copy_region_align4;" "__cl_copy_region_unalign_dst_offset;" "__cl_copy_region_unalign_src_offset;" "__cl_copy_buffer_rect;" + "__cl_copy_image_1d_to_1d;" "__cl_copy_image_2d_to_2d;" "__cl_copy_image_3d_to_2d;" "__cl_copy_image_2d_to_3d;" diff --git a/src/cl_program.c b/src/cl_program.c index 7888a8f..13867e0 100644 --- a/src/cl_program.c +++ b/src/cl_program.c @@ -213,12 +213,14 @@ cl_program_create_from_binary(cl_context ctx, goto error; } + int length = (int)lengths[0]; + program = cl_program_new(ctx); // TODO: Need to check the binary format here to return CL_INVALID_BINARY. - TRY_ALLOC(program->binary, cl_calloc(lengths[0], sizeof(char))); - memcpy(program->binary, binaries[0], lengths[0]); - program->binary_sz = lengths[0]; + TRY_ALLOC(program->binary, cl_calloc(length, sizeof(char))); + memcpy(program->binary, binaries[0], length); + program->binary_sz = length; program->source_type = FROM_BINARY; if(isBitcode((unsigned char*)program->binary+1, (unsigned char*)program->binary+program->binary_sz)) { -- 1.8.1.2 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH V2] fix enqueue_built_in_kernels bug. add image_1d_to_1d builtin kernel name.
From: Luo need asign the length to a local variable. v2: bug was a false alarm. the case passed after regenerate cmake folder. Signed-off-by Luo --- src/cl_gt_device.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h index d7855cd..63c9047 100644 --- a/src/cl_gt_device.h +++ b/src/cl_gt_device.h @@ -87,6 +87,7 @@ DECL_INFO_STRING(built_in_kernels, "__cl_copy_region_align4;" "__cl_copy_region_unalign_dst_offset;" "__cl_copy_region_unalign_src_offset;" "__cl_copy_buffer_rect;" + "__cl_copy_image_1d_to_1d;" "__cl_copy_image_2d_to_2d;" "__cl_copy_image_3d_to_2d;" "__cl_copy_image_2d_to_3d;" -- 1.8.1.2 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH] add the usage of link program from llvm binary.
From: Luo user A could compile and link kernel source to llvm binary first, then query the binary to save to file; With the binary, user B can call clCreateProgramWithBinary without compile the source again. this usage could protect those who need to protect the kernel source. Signed-off-by: Luo --- utests/runtime_compile_link.cpp | 41 ++--- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/utests/runtime_compile_link.cpp b/utests/runtime_compile_link.cpp index 8aeea31..f27b1dd 100644 --- a/utests/runtime_compile_link.cpp +++ b/utests/runtime_compile_link.cpp @@ -67,12 +67,47 @@ void runtime_compile_link(void) OCL_ASSERT(err==CL_SUCCESS); cl_program input_programs[2] = { program_A, program_B}; - cl_program linked_program = clLinkProgram(ctx, 0, NULL, NULL, 2, input_programs, NULL, NULL, &err); - + cl_program linked_program = clLinkProgram(ctx, 0, NULL, "-create-library", 2, input_programs, NULL, NULL, &err); OCL_ASSERT(linked_program != NULL); OCL_ASSERT(err == CL_SUCCESS); + size_t binarySize; + unsigned char *binary; + + // Get the size of the resulting binary (only one device) + err= clGetProgramInfo( linked_program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL ); + OCL_ASSERT(err==CL_SUCCESS); + + // Create a buffer and get the actual binary + binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize); + if (binary == NULL) { +OCL_ASSERT(0); +return ; + } + + unsigned char *buffers[ 1 ] = { binary }; + // Do another sanity check here first + size_t size; + cl_int loadErrors[ 1 ]; + err = clGetProgramInfo( linked_program, CL_PROGRAM_BINARIES, 0, NULL, &size ); + OCL_ASSERT(err==CL_SUCCESS); + if( size != sizeof( buffers ) ){ +free(binary); +return ; + } + + err = clGetProgramInfo( linked_program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL ); + OCL_ASSERT(err==CL_SUCCESS); + + cl_device_id deviceID; + err = clGetProgramInfo( linked_program, CL_PROGRAM_DEVICES, sizeof( deviceID), &deviceID, NULL ); + OCL_ASSERT(err==CL_SUCCESS); + + cl_program program_with_binary = clCreateProgramWithBinary(ctx, 1, &deviceID, &binarySize, (const unsigned char**)buffers, loadErrors, &err); + OCL_ASSERT(err==CL_SUCCESS); + cl_program my_newly_linked_program = clLinkProgram(ctx, 1, &deviceID, NULL, 1, &program_with_binary, NULL, NULL, &err); + OCL_ASSERT(err==CL_SUCCESS); // link success, run this kernel. const size_t n = 16; @@ -104,7 +139,7 @@ void runtime_compile_link(void) OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); - kernel = clCreateKernel(linked_program, "runtime_compile_link_a", &err); + kernel = clCreateKernel(my_newly_linked_program, "runtime_compile_link_a", &err); OCL_ASSERT(err == CL_SUCCESS); -- 1.8.1.2 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH] remove lspci, gbe_bin_genenrater would generator llvm binary by default.
From: Luo driver can get chipset id by ioctl instead of calling lspci in cmake; user could generator gen binary by configuring cmake option -DGEN_PCI_ID= or calling the gbe_bin_generater with option -t GEN_PCI_ID. Signed-off-by: Luo --- backend/src/gbe_bin_generater.cpp | 70 - src/CMakeLists.txt | 32 +++-- src/GetGenID.sh | 26 -- src/cl_program.c| 22 +--- utests/CMakeLists.txt | 17 ++--- utests/enqueue_built_in_kernels.cpp | 1 - 6 files changed, 89 insertions(+), 79 deletions(-) delete mode 100755 src/GetGenID.sh diff --git a/backend/src/gbe_bin_generater.cpp b/backend/src/gbe_bin_generater.cpp index 925ba93..17e8a7e 100644 --- a/backend/src/gbe_bin_generater.cpp +++ b/backend/src/gbe_bin_generater.cpp @@ -156,19 +156,21 @@ void program_build_instance::serialize_program(void) throw(int) { ofstream ofs; ostringstream oss; -size_t sz, header_sz = 0; +size_t sz = 0, header_sz = 0; ofs.open(bin_path, ofstream::out | ofstream::trunc | ofstream::binary); -//add header to differeciate from llvm bitcode binary. -// (5 bytes: 1 byte for binary type, 4 byte for bc code.) -char header = '\0'; - if (str_fmt_out) { - OUTS_UPDATE_SZ(header); - OUTS_UPDATE_SZ(header); - OUTS_UPDATE_SZ(header); - OUTS_UPDATE_SZ(header); - OUTS_UPDATE_SZ(header); + + if(gen_pci_id){ +//add header to differeciate from llvm bitcode binary. +// (5 bytes: 1 byte for binary type, 4 byte for bc code.) +char header = '\0'; +OUTS_UPDATE_SZ(header); +OUTS_UPDATE_SZ(header); +OUTS_UPDATE_SZ(header); +OUTS_UPDATE_SZ(header); +OUTS_UPDATE_SZ(header); + } string array_name = "Unkown_name_array"; unsigned long last_slash = bin_path.rfind("/"); @@ -180,9 +182,15 @@ void program_build_instance::serialize_program(void) throw(int) ofs << "#include " << "\n"; ofs << "char " << array_name << "[] = {" << "\n"; - sz = gbe_prog->serializeToBin(oss); - - sz+=5; + if(gen_pci_id){ +sz = gbe_prog->serializeToBin(oss); +sz+= header_sz; + }else{ +char *llvm_binary; +size_t bin_length = gbe_program_serialize_to_binary((gbe_program)gbe_prog, &llvm_binary, 1); +oss.write(llvm_binary, bin_length); +sz += bin_length; + } for (size_t i = 0; i < sz; i++) { unsigned char c = oss.str().c_str()[i]; @@ -191,18 +199,27 @@ void program_build_instance::serialize_program(void) throw(int) ofs << "0x"; ofs << asic_str << ((i == sz - 1) ? "" : ", "); } - ofs << "};\n"; string array_size = array_name + "_size"; ofs << "size_t " << array_size << " = " << sz << ";" << "\n"; } else { - OUTF_UPDATE_SZ(header); - OUTF_UPDATE_SZ(header); - OUTF_UPDATE_SZ(header); - OUTF_UPDATE_SZ(header); - OUTF_UPDATE_SZ(header); - sz = gbe_prog->serializeToBin(ofs); + if(gen_pci_id){ +//add header to differeciate from llvm bitcode binary. +// (5 bytes: 1 byte for binary type, 4 byte for bc code.) +char header = '\0'; +OUTF_UPDATE_SZ(header); +OUTF_UPDATE_SZ(header); +OUTF_UPDATE_SZ(header); +OUTF_UPDATE_SZ(header); +OUTF_UPDATE_SZ(header); +sz = gbe_prog->serializeToBin(ofs); + }else{ +char *llvm_binary; +size_t bin_length = gbe_program_serialize_to_binary((gbe_program)gbe_prog, &llvm_binary, 1); +ofs.write(llvm_binary, bin_length); +sz+=bin_length; + } } ofs.close(); @@ -215,15 +232,20 @@ void program_build_instance::serialize_program(void) throw(int) void program_build_instance::build_program(void) throw(int) { -// FIXME, we need to find a graceful way to generate internal binaries for difference -// devices. -gbe_program opaque = gbe_program_new_from_source(gen_pci_id, code, 0, build_opt.c_str(), NULL, NULL); +gbe_program opaque = NULL; +if(gen_pci_id){ + opaque = gbe_program_new_from_source(gen_pci_id, code, 0, build_opt.c_str(), NULL, NULL); +}else{ + opaque = gbe_program_compile_from_source(0, code, NULL, 0, build_opt.c_str(), NULL, NULL); +} if (!opaque) throw FILE_BUILD_FAILED; gbe_prog = reinterpret_cast(opaque); -assert(gbe_program_get_kernel_num(opaque)); +if(gen_pci_id){ + assert(gbe_program_get_kernel_num(opaque)); +} } const char* program_build_instance::file_map_open(void) throw(int) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9b41932..45c83d4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -5,30 +5,24 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/../include ${MESA_S
[Beignet] [PATCH v2] remove lspci, gbe_bin_genenrater would generator llvm binary by default.
From: Luo driver can get chipset id by ioctl instead of calling lspci in cmake; user could generator gen binary by configuring cmake option -DGEN_PCI_ID= or calling the gbe_bin_generater with option -t GEN_PCI_ID. v2: add "\0GENC" magic code for gen binary, fix typo. Signed-off-by: Luo --- backend/src/gbe_bin_generater.cpp | 72 - src/CMakeLists.txt | 32 +++-- src/GetGenID.sh | 26 -- src/cl_program.c| 22 +--- utests/CMakeLists.txt | 17 ++--- utests/enqueue_built_in_kernels.cpp | 1 - 6 files changed, 90 insertions(+), 80 deletions(-) delete mode 100755 src/GetGenID.sh diff --git a/backend/src/gbe_bin_generater.cpp b/backend/src/gbe_bin_generater.cpp index 925ba93..d9ae946 100644 --- a/backend/src/gbe_bin_generater.cpp +++ b/backend/src/gbe_bin_generater.cpp @@ -156,21 +156,23 @@ void program_build_instance::serialize_program(void) throw(int) { ofstream ofs; ostringstream oss; -size_t sz, header_sz = 0; +size_t sz = 0, header_sz = 0; ofs.open(bin_path, ofstream::out | ofstream::trunc | ofstream::binary); -//add header to differeciate from llvm bitcode binary. -// (5 bytes: 1 byte for binary type, 4 byte for bc code.) -char header = '\0'; - if (str_fmt_out) { - OUTS_UPDATE_SZ(header); - OUTS_UPDATE_SZ(header); - OUTS_UPDATE_SZ(header); - OUTS_UPDATE_SZ(header); - OUTS_UPDATE_SZ(header); - string array_name = "Unkown_name_array"; + if(gen_pci_id){ +//add header to differeciate from llvm bitcode binary. +// (5 bytes: 1 byte for binary type, 4 byte for bc code, 'GENC' is for gen binary.) +char gen_header[6] = "\0GENC"; +OUTS_UPDATE_SZ(gen_header[0]); +OUTS_UPDATE_SZ(gen_header[1]); +OUTS_UPDATE_SZ(gen_header[2]); +OUTS_UPDATE_SZ(gen_header[3]); +OUTS_UPDATE_SZ(gen_header[4]); + } + + string array_name = "Unknown_name_array"; unsigned long last_slash = bin_path.rfind("/"); unsigned long last_dot = bin_path.rfind("."); @@ -180,9 +182,15 @@ void program_build_instance::serialize_program(void) throw(int) ofs << "#include " << "\n"; ofs << "char " << array_name << "[] = {" << "\n"; - sz = gbe_prog->serializeToBin(oss); - - sz+=5; + if(gen_pci_id){ +sz = gbe_prog->serializeToBin(oss); +sz += header_sz; + }else{ +char *llvm_binary; +size_t bin_length = gbe_program_serialize_to_binary((gbe_program)gbe_prog, &llvm_binary, 1); +oss.write(llvm_binary, bin_length); +sz += bin_length; + } for (size_t i = 0; i < sz; i++) { unsigned char c = oss.str().c_str()[i]; @@ -191,18 +199,27 @@ void program_build_instance::serialize_program(void) throw(int) ofs << "0x"; ofs << asic_str << ((i == sz - 1) ? "" : ", "); } - ofs << "};\n"; string array_size = array_name + "_size"; ofs << "size_t " << array_size << " = " << sz << ";" << "\n"; } else { - OUTF_UPDATE_SZ(header); - OUTF_UPDATE_SZ(header); - OUTF_UPDATE_SZ(header); - OUTF_UPDATE_SZ(header); - OUTF_UPDATE_SZ(header); - sz = gbe_prog->serializeToBin(ofs); + if(gen_pci_id){ +//add header to differeciate from llvm bitcode binary. +// (5 bytes: 1 byte for binary type, 4 byte for bc code, 'GENC' is for gen binary.) +char gen_header[6] = "\0GENC"; +OUTF_UPDATE_SZ(gen_header[0]); +OUTF_UPDATE_SZ(gen_header[1]); +OUTF_UPDATE_SZ(gen_header[2]); +OUTF_UPDATE_SZ(gen_header[3]); +OUTF_UPDATE_SZ(gen_header[4]); +sz = gbe_prog->serializeToBin(ofs); + }else{ +char *llvm_binary; +size_t bin_length = gbe_program_serialize_to_binary((gbe_program)gbe_prog, &llvm_binary, 1); +ofs.write(llvm_binary, bin_length); +sz+=bin_length; + } } ofs.close(); @@ -215,15 +232,20 @@ void program_build_instance::serialize_program(void) throw(int) void program_build_instance::build_program(void) throw(int) { -// FIXME, we need to find a graceful way to generate internal binaries for difference -// devices. -gbe_program opaque = gbe_program_new_from_source(gen_pci_id, code, 0, build_opt.c_str(), NULL, NULL); +gbe_program opaque = NULL; +if(gen_pci_id){ + opaque = gbe_program_new_from_source(gen_pci_id, code, 0, build_opt.c_str(), NULL, NULL); +}else{ + opaque = gbe_program_compile_from_source(0, code, NULL, 0, build_opt.c_str(), NULL, NULL); +} if (!opaque) throw FILE_BUILD_FAILED; gbe_prog = reinterpret_cast(opaque); -assert(gbe_program_get_kernel_num(opaque)); +if(gen_pci_id){ + assert(gbe_program_get_kernel_num(opaque)); +} } const char* program_build_instance::file_map_open
[Beignet] [PATCH 2/2] add utest load_program_from_gen_bin.
From: LuoXionghu this test case would check whether genProgramSerializeToBinary in backend can generator gen binary correctly. rename load_program_from_bin to load_program_from_bin_file. the difference is load_program_from_bin_file could either load program from llvm binary or gen binary file generated by gbe_bin_generator. Signed-off-by: LuoXionghu --- utests/CMakeLists.txt | 3 +- utests/load_program_from_bin.cpp | 77 - utests/load_program_from_bin_file.cpp | 77 + utests/load_program_from_gen_bin.cpp | 93 +++ 4 files changed, 172 insertions(+), 78 deletions(-) delete mode 100644 utests/load_program_from_bin.cpp create mode 100644 utests/load_program_from_bin_file.cpp create mode 100644 utests/load_program_from_gen_bin.cpp diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 3614c57..561744d 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -171,7 +171,8 @@ set (utests_sources compiler_simd_any.cpp compiler_simd_all.cpp compiler_double_precision.cpp - load_program_from_bin.cpp + load_program_from_bin_file.cpp + load_program_from_gen_bin.cpp get_arg_info.cpp profiling_exec.cpp enqueue_copy_buf.cpp diff --git a/utests/load_program_from_bin.cpp b/utests/load_program_from_bin.cpp deleted file mode 100644 index d45c2bd..000 --- a/utests/load_program_from_bin.cpp +++ /dev/null @@ -1,77 +0,0 @@ -#include "utest_helper.hpp" -#include "utest_file_map.hpp" -#include -#include - -using namespace std; - -static void cpu(int global_id, float *src, float *dst) { -dst[global_id] = ceilf(src[global_id]); -} - -static void test_load_program_from_bin(void) -{ -const size_t n = 16; -float cpu_dst[16], cpu_src[16]; -cl_int status; -cl_int binary_status; -char *ker_path = NULL; - -cl_file_map_t *fm = cl_file_map_new(); -ker_path = cl_do_kiss_path("compiler_ceil.bin", device); -OCL_ASSERT (cl_file_map_open(fm, ker_path) == CL_FILE_MAP_SUCCESS); - -const unsigned char *src = (const unsigned char *)cl_file_map_begin(fm); -const size_t sz = cl_file_map_size(fm); - -program = clCreateProgramWithBinary(ctx, 1, - &device, &sz, &src, &binary_status, &status); - -OCL_ASSERT(program && status == CL_SUCCESS); - -/* OCL requires to build the program even if it is created from a binary */ -OCL_ASSERT(clBuildProgram(program, 1, &device, NULL, NULL, NULL) == CL_SUCCESS); - -kernel = clCreateKernel(program, "compiler_ceil", &status); -OCL_ASSERT(status == CL_SUCCESS); - -OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); -OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); -OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); -OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); -globals[0] = 16; -locals[0] = 16; - -// Run random tests -for (uint32_t pass = 0; pass < 8; ++pass) { -OCL_MAP_BUFFER(0); -for (int32_t i = 0; i < (int32_t) n; ++i) -cpu_src[i] = ((float*)buf_data[0])[i] = .1f * (rand() & 15) - .75f; -OCL_UNMAP_BUFFER(0); - -// Run the kernel on GPU -OCL_NDRANGE(1); - -// Run on CPU -for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); - -// Compare -OCL_MAP_BUFFER(1); - -#if 0 -printf(" GPU:\n"); -for (int32_t i = 0; i < (int32_t) n; ++i) -printf(" %f", ((float *)buf_data[1])[i]); -printf("\n CPU:\n"); -for (int32_t i = 0; i < (int32_t) n; ++i) -printf(" %f", cpu_dst[i]); -printf("\n"); -#endif - -for (int32_t i = 0; i < (int32_t) n; ++i) -OCL_ASSERT(((float *)buf_data[1])[i] == cpu_dst[i]); -OCL_UNMAP_BUFFER(1); -} -} - -MAKE_UTEST_FROM_FUNCTION(test_load_program_from_bin); diff --git a/utests/load_program_from_bin_file.cpp b/utests/load_program_from_bin_file.cpp new file mode 100644 index 000..feefacc --- /dev/null +++ b/utests/load_program_from_bin_file.cpp @@ -0,0 +1,77 @@ +#include "utest_helper.hpp" +#include "utest_file_map.hpp" +#include +#include + +using namespace std; + +static void cpu(int global_id, float *src, float *dst) { +dst[global_id] = ceilf(src[global_id]); +} + +static void test_load_program_from_bin_file(void) +{ +const size_t n = 16; +float cpu_dst[16], cpu_src[16]; +cl_int status; +cl_int binary_status; +char *ker_path = NULL; + +cl_file_map_t *fm = cl_file_map_new(); +ker_path = cl_do_kiss_path("compiler_ceil.bin", device); +OCL_ASSERT (cl_file_map_open(fm, ker_path) == CL_FILE_MAP_SUCCESS); + +const unsigned char *src = (const unsigned char *)cl_file_map_begin(fm); +const size_t sz = cl_file_map_size(fm); + +program = clCreateProgramWithBinary(ctx, 1, + &device, &sz, &src, &binary_status, &status); + +OCL_ASSERT(program && status == CL_SUCCESS); + +/* OCL requi
[Beignet] [PATCH 0/2] gen binary with pci info.
From: LuoXionghu this patchset depends on the patch "remove lspci, gbe_bin_genenrater would generator llvm binary by default." LuoXionghu (2): add pci info in the gen binary code. add utest load_program_from_gen_bin. backend/src/backend/gen_program.cpp | 34 + backend/src/gbe_bin_generater.cpp | 2 + utests/CMakeLists.txt | 3 +- utests/load_program_from_bin.cpp | 77 - utests/load_program_from_bin_file.cpp | 77 + utests/load_program_from_gen_bin.cpp | 93 +++ 6 files changed, 199 insertions(+), 87 deletions(-) delete mode 100644 utests/load_program_from_bin.cpp create mode 100644 utests/load_program_from_bin_file.cpp create mode 100644 utests/load_program_from_gen_bin.cpp -- 1.8.1.2 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH 1/2] add pci info in the gen binary code.
From: LuoXionghu the size of the pci info is 4 bytes, right after the '/0GENC'. check the header magic number and pci info before deserializeFromBin. Signed-off-by: LuoXionghu --- backend/src/backend/gen_program.cpp | 34 +- backend/src/gbe_bin_generater.cpp | 2 ++ 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index 84e8c2a..fc50344 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -196,15 +196,29 @@ namespace gbe { #endif } +#define IS_GEN_BINARY(binary) (*binary == '\0' && *(binary+1) == 'G'&& *(binary+2) == 'E' &&*(binary+3) == 'N' &&*(binary+4) == 'C') +#define BINARY_MATCH(typeA, typeB) ((IS_IVYBRIDGE(typeA) && IS_IVYBRIDGE(typeB)) || (IS_HASWELL(typeA) && IS_HASWELL(typeB)) ) +#define FILL_GEN_BINARY(binary) do{*binary = '\0'; *(binary+1) = 'G'; *(binary+2) = 'E'; *(binary+3) = 'N'; *(binary+4) = 'C';}while(0) +#define FILL_DEVICE_ID(binary, devID) do {*(binary+5) = devID; *(binary+6) = devID >>8; *(binary+7) = devID >>16; *(binary+8) = devID >>24;}while(0) + static gbe_program genProgramNewFromBinary(uint32_t deviceID, const char *binary, size_t size) { using namespace gbe; std::string binary_content; -//the first 5 bytes are header to differentiate from llvm bitcode binary. -binary_content.assign(binary+5, size-5); +//the header length is 9 bytes: 1 byte is binary type, 4 bytes are bitcode header, 4 bytes are device id info. +uint32_t bin_deviceID = *(binary+5)|*(binary+6)<<8|*(binary+7)<<16|*(binary+8)<<24; + +// check whether is gen binary ('/0GENC') +if(!IS_GEN_BINARY(binary)){ +return NULL; +} +// check the whether the current device ID match the binary file's. +if(!BINARY_MATCH(bin_deviceID, deviceID)){ + return NULL; +} + +binary_content.assign(binary+9, size-9); GenProgram *program = GBE_NEW(GenProgram, deviceID); std::istringstream ifs(binary_content, std::ostringstream::binary); -// FIXME we need to check the whether the current device ID match the binary file's. -deviceID = deviceID; if (!program->deserializeFromBin(ifs)) { delete program; @@ -255,11 +269,13 @@ namespace gbe { } //add header to differetiate from llvm bitcode binary. - //the header length is 5 bytes: 1 binary type, 4 bitcode header. - *binary = (char *)malloc(sizeof(char) * (sz+5) ); - memset(*binary, 0, sizeof(char) * (sz+5) ); - memcpy(*binary+5, oss.str().c_str(), sz*sizeof(char)); - return sz+5; + //the header length is 9 bytes: 1 byte is binary type, 4 bytes are bitcode header, 4 bytes are device id info. + *binary = (char *)malloc(sizeof(char) * (sz+9) ); + memset(*binary, 0, sizeof(char) * (sz+9) ); + FILL_GEN_BINARY(*binary); + FILL_DEVICE_ID(*binary, prog->deviceID); + memcpy(*binary+9, oss.str().c_str(), sz*sizeof(char)); + return sz+9; }else{ #ifdef GBE_COMPILER_AVAILABLE std::string str; diff --git a/backend/src/gbe_bin_generater.cpp b/backend/src/gbe_bin_generater.cpp index d9ae946..e7a5b97 100644 --- a/backend/src/gbe_bin_generater.cpp +++ b/backend/src/gbe_bin_generater.cpp @@ -170,6 +170,7 @@ void program_build_instance::serialize_program(void) throw(int) OUTS_UPDATE_SZ(gen_header[2]); OUTS_UPDATE_SZ(gen_header[3]); OUTS_UPDATE_SZ(gen_header[4]); +OUTS_UPDATE_SZ(gen_pci_id); } string array_name = "Unknown_name_array"; @@ -213,6 +214,7 @@ void program_build_instance::serialize_program(void) throw(int) OUTF_UPDATE_SZ(gen_header[2]); OUTF_UPDATE_SZ(gen_header[3]); OUTF_UPDATE_SZ(gen_header[4]); +OUTF_UPDATE_SZ(gen_pci_id); sz = gbe_prog->serializeToBin(ofs); }else{ char *llvm_binary; -- 1.8.1.2 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [Patch V3 1/2] add platform info in the gen binary code.
From: LuoXionghu the size of the platform info is 3 bytes, right after the '/0GENC'. check the header magic number and platform info before deserializeFromBin. v2: supports IVB/BYT/HSW binary on its' platform, and BYT binary runs on IVB. v3: fix 'BYT' overwritten by 'IVB'; Signed-off-by: LuoXionghu --- backend/src/backend/gen_program.cpp | 56 +++-- backend/src/gbe_bin_generater.cpp | 23 +++ 2 files changed, 70 insertions(+), 9 deletions(-) diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index 84e8c2a..c846786 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -196,15 +196,36 @@ namespace gbe { #endif } +#define BINARY_HEADER_LENGTH 8 +#define IS_GEN_BINARY(binary) (*binary == '\0' && *(binary+1) == 'G'&& *(binary+2) == 'E' &&*(binary+3) == 'N' &&*(binary+4) == 'C') +#define FILL_GEN_BINARY(binary) do{*binary = '\0'; *(binary+1) = 'G'; *(binary+2) = 'E'; *(binary+3) = 'N'; *(binary+4) = 'C';}while(0) +#define FILL_DEVICE_ID(binary, src_hw_info) do {*(binary+5) = src_hw_info[0]; *(binary+6) = src_hw_info[1]; *(binary+7) = src_hw_info[2];}while(0) +#define DEVICE_MATCH(typeA, src_hw_info) ((IS_IVYBRIDGE(typeA) && !strcmp(src_hw_info, "IVB")) || \ + (IS_IVYBRIDGE(typeA) && !strcmp(src_hw_info, "BYT")) || \ + (IS_BAYTRAIL_T(typeA) && !strcmp(src_hw_info, "BYT")) || \ + (IS_HASWELL(typeA) && !strcmp(src_hw_info, "HSW")) ) + static gbe_program genProgramNewFromBinary(uint32_t deviceID, const char *binary, size_t size) { using namespace gbe; std::string binary_content; -//the first 5 bytes are header to differentiate from llvm bitcode binary. -binary_content.assign(binary+5, size-5); +//the header length is 8 bytes: 1 byte is binary type, 4 bytes are bitcode header, 3 bytes are hw info. +char src_hw_info[4]=""; +src_hw_info[0] = *(binary+5); +src_hw_info[1] = *(binary+6); +src_hw_info[2] = *(binary+7); + +// check whether is gen binary ('/0GENC') +if(!IS_GEN_BINARY(binary)){ +return NULL; +} +// check the whether the current device ID match the binary file's. +if(!DEVICE_MATCH(deviceID, src_hw_info)){ + return NULL; +} + +binary_content.assign(binary+BINARY_HEADER_LENGTH, size-BINARY_HEADER_LENGTH); GenProgram *program = GBE_NEW(GenProgram, deviceID); std::istringstream ifs(binary_content, std::ostringstream::binary); -// FIXME we need to check the whether the current device ID match the binary file's. -deviceID = deviceID; if (!program->deserializeFromBin(ifs)) { delete program; @@ -255,11 +276,28 @@ namespace gbe { } //add header to differetiate from llvm bitcode binary. - //the header length is 5 bytes: 1 binary type, 4 bitcode header. - *binary = (char *)malloc(sizeof(char) * (sz+5) ); - memset(*binary, 0, sizeof(char) * (sz+5) ); - memcpy(*binary+5, oss.str().c_str(), sz*sizeof(char)); - return sz+5; + //the header length is 8 bytes: 1 byte is binary type, 4 bytes are bitcode header, 3 bytes are hw info. + *binary = (char *)malloc(sizeof(char) * (sz+BINARY_HEADER_LENGTH) ); + memset(*binary, 0, sizeof(char) * (sz+BINARY_HEADER_LENGTH) ); + FILL_GEN_BINARY(*binary); + char src_hw_info[4]=""; + if(IS_IVYBRIDGE(prog->deviceID)){ +src_hw_info[0]='I'; +src_hw_info[1]='V'; +src_hw_info[2]='B'; +if(IS_BAYTRAIL_T(prog->deviceID)){ + src_hw_info[0]='B'; + src_hw_info[1]='Y'; + src_hw_info[2]='T'; +} + }else if(IS_HASWELL(prog->deviceID)){ +src_hw_info[0]='H'; +src_hw_info[1]='S'; +src_hw_info[2]='W'; + } + FILL_DEVICE_ID(*binary, src_hw_info); + memcpy(*binary+BINARY_HEADER_LENGTH, oss.str().c_str(), sz*sizeof(char)); + return sz+BINARY_HEADER_LENGTH; }else{ #ifdef GBE_COMPILER_AVAILABLE std::string str; diff --git a/backend/src/gbe_bin_generater.cpp b/backend/src/gbe_bin_generater.cpp index d9ae946..86c4406 100644 --- a/backend/src/gbe_bin_generater.cpp +++ b/backend/src/gbe_bin_generater.cpp @@ -39,6 +39,7 @@ #include "backend/program.h" #include "backend/program.hpp" #include "backend/src/sys/platform.hpp" +#include "src/cl_device_data.h" using namespace std; @@ -159,6 +160,22 @@ void program_build_instance::serialize_program(void) throw(int) size_t sz = 0, header_sz = 0; ofs.open(bin_path, ofstream::out | ofstream::trunc | ofstream::binary); +char src_hw_info[4]=""; +if(IS_IVYBRIDGE(gen_pci_id)){ + src_hw_info[0]='I'; + src_hw_info[1]='V'; + src_hw_info[2]='B'; + if(IS_BAYTRAIL_T(gen_pci_id)){ +src_hw_info[0]='B'; +src_hw_info[1]='Y'; +src_hw_info[2]='T'; + } +
[Beignet] [Patch V3 2/2] add utest load_program_from_gen_bin.
From: LuoXionghu this test case would check whether genProgramSerializeToBinary in backend can generator gen binary correctly. rename load_program_from_bin to load_program_from_bin_file. the difference is load_program_from_bin_file could either load program from llvm binary or gen binary file generated by gbe_bin_generator. Signed-off-by: LuoXionghu --- utests/CMakeLists.txt | 3 +- utests/load_program_from_bin.cpp | 77 - utests/load_program_from_bin_file.cpp | 77 + utests/load_program_from_gen_bin.cpp | 93 +++ 4 files changed, 172 insertions(+), 78 deletions(-) delete mode 100644 utests/load_program_from_bin.cpp create mode 100644 utests/load_program_from_bin_file.cpp create mode 100644 utests/load_program_from_gen_bin.cpp diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 3614c57..561744d 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -171,7 +171,8 @@ set (utests_sources compiler_simd_any.cpp compiler_simd_all.cpp compiler_double_precision.cpp - load_program_from_bin.cpp + load_program_from_bin_file.cpp + load_program_from_gen_bin.cpp get_arg_info.cpp profiling_exec.cpp enqueue_copy_buf.cpp diff --git a/utests/load_program_from_bin.cpp b/utests/load_program_from_bin.cpp deleted file mode 100644 index d45c2bd..000 --- a/utests/load_program_from_bin.cpp +++ /dev/null @@ -1,77 +0,0 @@ -#include "utest_helper.hpp" -#include "utest_file_map.hpp" -#include -#include - -using namespace std; - -static void cpu(int global_id, float *src, float *dst) { -dst[global_id] = ceilf(src[global_id]); -} - -static void test_load_program_from_bin(void) -{ -const size_t n = 16; -float cpu_dst[16], cpu_src[16]; -cl_int status; -cl_int binary_status; -char *ker_path = NULL; - -cl_file_map_t *fm = cl_file_map_new(); -ker_path = cl_do_kiss_path("compiler_ceil.bin", device); -OCL_ASSERT (cl_file_map_open(fm, ker_path) == CL_FILE_MAP_SUCCESS); - -const unsigned char *src = (const unsigned char *)cl_file_map_begin(fm); -const size_t sz = cl_file_map_size(fm); - -program = clCreateProgramWithBinary(ctx, 1, - &device, &sz, &src, &binary_status, &status); - -OCL_ASSERT(program && status == CL_SUCCESS); - -/* OCL requires to build the program even if it is created from a binary */ -OCL_ASSERT(clBuildProgram(program, 1, &device, NULL, NULL, NULL) == CL_SUCCESS); - -kernel = clCreateKernel(program, "compiler_ceil", &status); -OCL_ASSERT(status == CL_SUCCESS); - -OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); -OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); -OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); -OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); -globals[0] = 16; -locals[0] = 16; - -// Run random tests -for (uint32_t pass = 0; pass < 8; ++pass) { -OCL_MAP_BUFFER(0); -for (int32_t i = 0; i < (int32_t) n; ++i) -cpu_src[i] = ((float*)buf_data[0])[i] = .1f * (rand() & 15) - .75f; -OCL_UNMAP_BUFFER(0); - -// Run the kernel on GPU -OCL_NDRANGE(1); - -// Run on CPU -for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); - -// Compare -OCL_MAP_BUFFER(1); - -#if 0 -printf(" GPU:\n"); -for (int32_t i = 0; i < (int32_t) n; ++i) -printf(" %f", ((float *)buf_data[1])[i]); -printf("\n CPU:\n"); -for (int32_t i = 0; i < (int32_t) n; ++i) -printf(" %f", cpu_dst[i]); -printf("\n"); -#endif - -for (int32_t i = 0; i < (int32_t) n; ++i) -OCL_ASSERT(((float *)buf_data[1])[i] == cpu_dst[i]); -OCL_UNMAP_BUFFER(1); -} -} - -MAKE_UTEST_FROM_FUNCTION(test_load_program_from_bin); diff --git a/utests/load_program_from_bin_file.cpp b/utests/load_program_from_bin_file.cpp new file mode 100644 index 000..feefacc --- /dev/null +++ b/utests/load_program_from_bin_file.cpp @@ -0,0 +1,77 @@ +#include "utest_helper.hpp" +#include "utest_file_map.hpp" +#include +#include + +using namespace std; + +static void cpu(int global_id, float *src, float *dst) { +dst[global_id] = ceilf(src[global_id]); +} + +static void test_load_program_from_bin_file(void) +{ +const size_t n = 16; +float cpu_dst[16], cpu_src[16]; +cl_int status; +cl_int binary_status; +char *ker_path = NULL; + +cl_file_map_t *fm = cl_file_map_new(); +ker_path = cl_do_kiss_path("compiler_ceil.bin", device); +OCL_ASSERT (cl_file_map_open(fm, ker_path) == CL_FILE_MAP_SUCCESS); + +const unsigned char *src = (const unsigned char *)cl_file_map_begin(fm); +const size_t sz = cl_file_map_size(fm); + +program = clCreateProgramWithBinary(ctx, 1, + &device, &sz, &src, &binary_status, &status); + +OCL_ASSERT(program && status == CL_SUCCESS); + +/* OCL requi
[Beignet] [PATCH] improve the build performance of vector type built-in function.
From: LuoXionghu expand the gentypen with loop to reduce the redundant inline. Signed-off-by: LuoXionghu --- backend/src/gen_builtin_vector.py | 42 +-- 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/backend/src/gen_builtin_vector.py b/backend/src/gen_builtin_vector.py index b100bbf..83e2bcb 100755 --- a/backend/src/gen_builtin_vector.py +++ b/backend/src/gen_builtin_vector.py @@ -283,9 +283,39 @@ class builtinProto(): formatStr += '{0} {1}param{2}'.format(ptype[0], pointerStr, n) formatStr += ')' -formatStr = self.append(formatStr, '{{return ({0}{1})('.format(vtype[0], vtype[1])) -self.indent = len(formatStr) -for j in range(0, vtype[1]): +if self.functionName != 'select' and ptypeSeqs[0] == ptypeSeqs[self.paramCount-1]: +formatStr += '\n{ \n union{' +formatStr = self.append(formatStr, '{0} va[{1}];'.format(vtype[0], vtype[1])) +formatStr = self.append(formatStr, '{0}{1} vv{2};'.format(vtype[0], vtype[1], vtype[1])) +formatStr += '\n }uret;' +formatStr += '\n union{' +formatStr = self.append(formatStr, '{0} pa[{1}];'.format(ptype[0], ptype[1])) +formatStr = self.append(formatStr, '{0}{1} pv{2};'.format(ptype[0], ptype[1], ptype[1])) +formatStr += '\n }' +for n in range(0, self.paramCount): + formatStr += 'usrc{0}'.format(n) + if n+1 != self.paramCount: +formatStr +=', ' +formatStr += ';' + +for n in range(0, self.paramCount): + formatStr = self.append(formatStr, ' usrc{0}.pv{1} = param{2};'.format(n, ptype[1], n)) +formatStr = self.append(formatStr, ' for(int i =0; i < {0}; i++)'.format(ptype[1])) +formatStr = self.append(formatStr, 'uret.va[i] = {0}('.format(self.functionName)) + +for n in range(0, self.paramCount): + formatStr += 'usrc{0}.pa[i]'.format(n) + if n+1 != self.paramCount: +formatStr +=', ' +formatStr += ');' +formatStr = self.append(formatStr, ' return uret.vv{0};'.format(vtype[1])) +formatStr += '\n}' +formatStr = self.append(formatStr) +return formatStr +else: + formatStr = self.append(formatStr, '{{return ({0}{1})('.format(vtype[0], vtype[1])) + self.indent = len(formatStr) + for j in range(0, vtype[1]): if (j != 0): formatStr += ',' if (j + 1) % 2 == 0: @@ -320,10 +350,10 @@ class builtinProto(): formatStr += ')' -formatStr += '); }\n' -self.append(formatStr) + formatStr += '); }\n' + self.append(formatStr) -return formatStr + return formatStr def output(self): for line in self.outputStr: -- 1.8.1.2 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH v2] improve the build performance of vector type built-in function.
From: LuoXionghu expand the gentypen with loop to reduce the redundant inline for more than 4 components type. v2: add the greater than 4 componets conditon to avoid performace degration. Signed-off-by: Luo Xionghu --- backend/src/gen_builtin_vector.py | 42 +-- 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/backend/src/gen_builtin_vector.py b/backend/src/gen_builtin_vector.py index b100bbf..15dbaf4 100755 --- a/backend/src/gen_builtin_vector.py +++ b/backend/src/gen_builtin_vector.py @@ -283,9 +283,39 @@ class builtinProto(): formatStr += '{0} {1}param{2}'.format(ptype[0], pointerStr, n) formatStr += ')' -formatStr = self.append(formatStr, '{{return ({0}{1})('.format(vtype[0], vtype[1])) -self.indent = len(formatStr) -for j in range(0, vtype[1]): +if self.functionName != 'select' and ptypeSeqs[0] == ptypeSeqs[self.paramCount-1] and ptype[1] > 4: +formatStr += '\n{ \n union{' +formatStr = self.append(formatStr, '{0} va[{1}];'.format(vtype[0], vtype[1])) +formatStr = self.append(formatStr, '{0}{1} vv{2};'.format(vtype[0], vtype[1], vtype[1])) +formatStr += '\n }uret;' +formatStr += '\n union{' +formatStr = self.append(formatStr, '{0} pa[{1}];'.format(ptype[0], ptype[1])) +formatStr = self.append(formatStr, '{0}{1} pv{2};'.format(ptype[0], ptype[1], ptype[1])) +formatStr += '\n }' +for n in range(0, self.paramCount): + formatStr += 'usrc{0}'.format(n) + if n+1 != self.paramCount: +formatStr +=', ' +formatStr += ';' + +for n in range(0, self.paramCount): + formatStr = self.append(formatStr, ' usrc{0}.pv{1} = param{2};'.format(n, ptype[1], n)) +formatStr = self.append(formatStr, ' for(int i =0; i < {0}; i++)'.format(ptype[1])) +formatStr = self.append(formatStr, 'uret.va[i] = {0}('.format(self.functionName)) + +for n in range(0, self.paramCount): + formatStr += 'usrc{0}.pa[i]'.format(n) + if n+1 != self.paramCount: +formatStr +=', ' +formatStr += ');' +formatStr = self.append(formatStr, ' return uret.vv{0};'.format(vtype[1])) +formatStr += '\n}' +formatStr = self.append(formatStr) +return formatStr +else: + formatStr = self.append(formatStr, '{{return ({0}{1})('.format(vtype[0], vtype[1])) + self.indent = len(formatStr) + for j in range(0, vtype[1]): if (j != 0): formatStr += ',' if (j + 1) % 2 == 0: @@ -320,10 +350,10 @@ class builtinProto(): formatStr += ')' -formatStr += '); }\n' -self.append(formatStr) + formatStr += '); }\n' + self.append(formatStr) -return formatStr + return formatStr def output(self): for line in self.outputStr: -- 1.8.1.2 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH] fix the relational built-in vector function regression.
From: Luo Xionghu the relational vector function need return -1 instead of 1 according to the spec. Signed-off-by: Luo Xionghu --- backend/src/gen_builtin_vector.py | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backend/src/gen_builtin_vector.py b/backend/src/gen_builtin_vector.py index 15dbaf4..5f1c4b7 100755 --- a/backend/src/gen_builtin_vector.py +++ b/backend/src/gen_builtin_vector.py @@ -301,7 +301,10 @@ class builtinProto(): for n in range(0, self.paramCount): formatStr = self.append(formatStr, ' usrc{0}.pv{1} = param{2};'.format(n, ptype[1], n)) formatStr = self.append(formatStr, ' for(int i =0; i < {0}; i++)'.format(ptype[1])) -formatStr = self.append(formatStr, 'uret.va[i] = {0}('.format(self.functionName)) +formatStr += '\nuret.va[i] = ' +if self.prefix == 'relational' and self.functionName != 'bitselect' and self.functionName != 'select': + formatStr += '-' +formatStr += '{0}('.format(self.functionName) for n in range(0, self.paramCount): formatStr += 'usrc{0}.pa[i]'.format(n) -- 1.8.3.2 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH] fix opencv_test_imgproc subcase OCL_ImgProc/Accumulate.Mask regression.
From: Luo Xionghu This regression is caused by structural analysis when check the if-then node, acturally there are four types of if-then node according to the topology and fallthrough information. fallthrough check is added in this patch. Signed-off-by: Luo Xionghu --- backend/src/backend/gen_insn_selection.cpp |4 +++- backend/src/ir/function.hpp|5 + backend/src/ir/structural_analysis.cpp |9 - backend/src/ir/structural_analysis.hpp | 16 +++- 4 files changed, 31 insertions(+), 3 deletions(-) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index b7a39af..9a552b1 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -4018,7 +4018,9 @@ namespace gbe sel.curr.physicalFlag = 0; sel.curr.flagIndex = (uint64_t)pred; sel.curr.externFlag = 1; - sel.curr.inversePredicate = 1; + if(insn.getParent()->need_reverse ){ +sel.curr.inversePredicate = 1; + } sel.curr.predicate = GEN_PREDICATE_NORMAL; sel.IF(GenRegister::immd(0), jip, uip); sel.curr.inversePredicate = 0; diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp index c5582b4..b877bce 100644 --- a/backend/src/ir/function.hpp +++ b/backend/src/ir/function.hpp @@ -87,6 +87,11 @@ namespace ir { set definedPhiRegs; /* these three are used by structure transforming */ public: +/*if need_reverse is true, need to reverse prediction. + *if condition is TRUE, IF instruction will execute the following block, + * different from BRA instruction, so all the IF instruction need_reverse + * except two special case(fallthrough is the same with succs.). */ +bool need_reverse; /* if needEndif is true, it means that this bb is the exit of an * outermost structure, so this block needs another endif to match * the if inserted at the entry of this structure, otherwise this diff --git a/backend/src/ir/structural_analysis.cpp b/backend/src/ir/structural_analysis.cpp index dfc2118..c106fa7 100644 --- a/backend/src/ir/structural_analysis.cpp +++ b/backend/src/ir/structural_analysis.cpp @@ -120,6 +120,7 @@ namespace analysis /* since this node is an if node, so we remove the BRA instruction at the bottom of the exit BB of 'node', * and insert IF instead */ +pbb->need_reverse = node->need_reverse; pbb->erase(it); ir::Instruction insn = ir::IF(matchingElseLabel, reg); ir::Instruction* p_new_insn = pbb->getParent().newInstruction(insn); @@ -724,7 +725,7 @@ namespace analysis n = *(++(node->succs().begin())); /* check for if node then n */ - if(n->succs().size() == 1 && + if( n->succs().size() == 1 && n->preds().size() == 1 && *(n->succs().begin()) == m && !n->hasBarrier() && !node->hasBarrier()) @@ -734,6 +735,9 @@ namespace analysis nset.insert(n); Node* p = new IfThenNode(node, n); +if(node->fallthrough() == m){ + node->need_reverse = false; +} if(node->canBeHandled == false || n->canBeHandled == false) p->canBeHandled = false; @@ -752,6 +756,9 @@ namespace analysis nset.insert(m); Node* p = new IfThenNode(node, m); +if(node->fallthrough() == n){ + node->need_reverse = false; +} if(node->canBeHandled == false || m->canBeHandled == false) p->canBeHandled = false; diff --git a/backend/src/ir/structural_analysis.hpp b/backend/src/ir/structural_analysis.hpp index 06c2f5f..f7a34d1 100644 --- a/backend/src/ir/structural_analysis.hpp +++ b/backend/src/ir/structural_analysis.hpp @@ -87,7 +87,7 @@ namespace analysis class Node { public: -Node(RegionType rtype, const NodeList& children): has_barrier(false), mark(false), canBeHandled(true) +Node(RegionType rtype, const NodeList& children): has_barrier(false), mark(false), canBeHandled(true), need_reverse(true) { this->rtype = rtype; this->children = children; @@ -118,6 +118,20 @@ namespace analysis bool canBeHandled; //label is for debug int label; +/* need_reverse should be false under two circumstance, + * fallthrough is the same with succs: + * (1) n->succs == m && node->fallthrough == m + * node + * | \ + * | \ + * m<--n + * (2) m->succs == n && node->fallthrough == n + * node + * | \ + * | \ + * m-->n + * */ +bool need_reverse; }; /* represents basic block */ -- 1.7.9.5 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH v2] fix opencv_test_imgproc subcase OCL_ImgProc/Accumulate.Mask regression.
From: Luo Xionghu This regression is caused by structural analysis when check the if-then node, acturally there are four types of if-then node according to the topology and fallthrough information. fallthrough check is added in this patch. v2: add inversePredicate member and function for BranchInstruction; print the exact meanning of IF instruction in GEN_IR. Signed-off-by: Luo Xionghu --- backend/src/backend/gen_insn_selection.cpp |2 +- backend/src/ir/instruction.cpp | 12 +--- backend/src/ir/instruction.hpp |4 +++- backend/src/ir/structural_analysis.cpp | 10 -- backend/src/ir/structural_analysis.hpp | 16 +++- 5 files changed, 36 insertions(+), 8 deletions(-) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index b7a39af..170a9d8 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -4018,7 +4018,7 @@ namespace gbe sel.curr.physicalFlag = 0; sel.curr.flagIndex = (uint64_t)pred; sel.curr.externFlag = 1; - sel.curr.inversePredicate = 1; + sel.curr.inversePredicate = insn.getInversePredicated(); sel.curr.predicate = GEN_PREDICATE_NORMAL; sel.IF(GenRegister::immd(0), jip, uip); sel.curr.inversePredicate = 0; diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index bfb2000..370fb87 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -348,13 +348,14 @@ namespace ir { public NDstPolicy { public: - INLINE BranchInstruction(Opcode op, LabelIndex labelIndex, Register predicate) { + INLINE BranchInstruction(Opcode op, LabelIndex labelIndex, Register predicate, bool inv_pred=false) { GBE_ASSERT(op == OP_BRA || op == OP_IF); this->opcode = op; this->predicate = predicate; this->labelIndex = labelIndex; this->hasPredicate = true; this->hasLabel = true; +this->inversePredicate = inv_pred; } INLINE BranchInstruction(Opcode op, LabelIndex labelIndex) { GBE_ASSERT(op == OP_BRA || op == OP_ELSE || op == OP_ENDIF); @@ -385,11 +386,13 @@ namespace ir { predicate = reg; } INLINE bool isPredicated(void) const { return hasPredicate; } + INLINE bool getInversePredicated(void) const { return inversePredicate; } INLINE bool wellFormed(const Function &fn, std::string &why) const; INLINE void out(std::ostream &out, const Function &fn) const; Register predicate;//!< Predication means conditional branch LabelIndex labelIndex; //!< Index of the label the branch targets bool hasPredicate:1; //!< Is it predicated? + bool inversePredicate:1; //!< Is it inverse predicated? bool hasLabel:1; //!< Is there any target label? Register dst[0]; //!< No destination }; @@ -1142,6 +1145,8 @@ namespace ir { INLINE void BranchInstruction::out(std::ostream &out, const Function &fn) const { this->outOpcode(out); + if(opcode == OP_IF && inversePredicate) +out << " !"; if (hasPredicate) out << "<%" << this->getSrc(fn, 0) << ">"; if (hasLabel) out << " -> label$" << labelIndex; @@ -1463,6 +1468,7 @@ DECL_MEM_FN(LoadInstruction, bool, isAligned(void), isAligned()) DECL_MEM_FN(LoadImmInstruction, Type, getType(void), getType()) DECL_MEM_FN(LabelInstruction, LabelIndex, getLabelIndex(void), getLabelIndex()) DECL_MEM_FN(BranchInstruction, bool, isPredicated(void), isPredicated()) +DECL_MEM_FN(BranchInstruction, bool, getInversePredicated(void), getInversePredicated()) DECL_MEM_FN(BranchInstruction, LabelIndex, getLabelIndex(void), getLabelIndex()) DECL_MEM_FN(SyncInstruction, uint32_t, getParameters(void), getParameters()) DECL_MEM_FN(SampleInstruction, Type, getSrcType(void), getSrcType()) @@ -1615,8 +1621,8 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex } // IF - Instruction IF(LabelIndex labelIndex, Register pred) { -return internal::BranchInstruction(OP_IF, labelIndex, pred).convert(); + Instruction IF(LabelIndex labelIndex, Register pred, bool inv_pred) { +return internal::BranchInstruction(OP_IF, labelIndex, pred, inv_pred).convert(); } // ELSE diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index e245638..39fb2db 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -430,6 +430,8 @@ namespace ir { public: /*! Indicate if the branch is predicated */ bool isPredicated(void) const; +/*! Indicate if the branch is inverse predicated */ +bool getInversePredicated(void) const; /*! Return the predicate register (if predicated) */ RegisterData getPredicate(void) const { GBE_ASSERTM(this->isPredicated(
[Beignet] [PATCH] fix piglit cl-api-get-program-info fail.
From: Luo Xionghu add pointer check. Signed-off-by: Luo Xionghu --- src/cl_program.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cl_program.c b/src/cl_program.c index c141a35..a745c00 100644 --- a/src/cl_program.c +++ b/src/cl_program.c @@ -743,7 +743,7 @@ cl_program_get_kernel_names(cl_program p, size_t size, char *names, size_t *size int i = 0; const char *ker_name = NULL; size_t len = 0; - *size_ret = 0; + if(size_ret) *size_ret = 0; if(p->ker == NULL) { return; -- 1.7.9.5 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH] fix piglit clGetKernelWorkGroupInfo fail.
From: Luo Xionghu add CL_KERNEL_GLOBAL_WORK_SIZE option for clGetKernelWorkGroupInfo. Signed-off-by: Luo Xionghu --- src/cl_api.c |3 +++ src/cl_device_id.c |1 + src/cl_kernel.h|2 ++ 3 files changed, 6 insertions(+) diff --git a/src/cl_api.c b/src/cl_api.c index 2370dc0..07e8954 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -2965,6 +2965,9 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue, } } + for (i = 0; i < work_dim; ++i) +kernel->global_work_sz[i] = fixed_global_sz[i]; + /* Do device specific checks are enqueue the kernel */ err = cl_command_queue_ND_range(command_queue, kernel, diff --git a/src/cl_device_id.c b/src/cl_device_id.c index a0f0c99..6bd80a6 100644 --- a/src/cl_device_id.c +++ b/src/cl_device_id.c @@ -573,6 +573,7 @@ cl_get_kernel_workgroup_info(cl_kernel kernel, } DECL_FIELD(COMPILE_WORK_GROUP_SIZE, kernel->compile_wg_sz) DECL_FIELD(PRIVATE_MEM_SIZE, kernel->stack_size) +DECL_FIELD(GLOBAL_WORK_SIZE, kernel->global_work_sz) default: return CL_INVALID_VALUE; }; diff --git a/src/cl_kernel.h b/src/cl_kernel.h index f4ed8d3..85a997d 100644 --- a/src/cl_kernel.h +++ b/src/cl_kernel.h @@ -59,6 +59,8 @@ struct _cl_kernel { cl_ulong local_mem_sz; /* local memory size specified in kernel args. */ size_t compile_wg_sz[3];/* Required workgroup size by __attribute__((reqd_work_gro up_size(X, Y, Z))) qualifier.*/ + size_t global_work_sz[3];/* maximum global size that can be used to execute a kernel +(i.e. global_work_size argument to clEnqueueNDRangeKernel.)*/ size_t stack_size; /* stack size per work item. */ cl_argument *args; /* To track argument setting */ uint32_t arg_n:31; /* Number of arguments */ -- 1.7.9.5 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH] fix clGetKernelWorkGroupInfo built-in kernel fail.
From: Luo Xionghu add CL_KERNEL_GLOBAL_WORK_SIZE option for clGetKernelWorkGroupInfo. v2: should return the max global work size instead of current work size. This funtion need return CL_INVALID_VALUE if the device is not a custom device or kernel is not a built-in kernel. we have 3 kind of built-in kernels for 1d/2d/3d memories, the max global work size are decided by the dimension and memory type. the piglit fail is caused by calling NON built-in kernels, so need send patch to piglit later. Signed-off-by: Luo Xionghu --- src/cl_device_id.c| 34 + src/cl_device_id.h|3 +++ src/cl_gt_device.h|3 +++ src/cl_kernel.h |2 ++ utests/CMakeLists.txt |1 + utests/builtin_kernel_max_global_size.cpp | 30 + 6 files changed, 73 insertions(+) create mode 100644 utests/builtin_kernel_max_global_size.cpp diff --git a/src/cl_device_id.c b/src/cl_device_id.c index a0f0c99..5b24fcb 100644 --- a/src/cl_device_id.c +++ b/src/cl_device_id.c @@ -515,6 +515,22 @@ cl_device_get_version(cl_device_id device, cl_int *ver) #include "cl_kernel.h" #include "cl_program.h" +LOCAL int +cl_check_builtin_kernel_dimension(cl_kernel kernel, cl_device_id device) +{ + const char * n = cl_kernel_get_name(kernel); + const char * builtin_kernels_2d = "__cl_copy_image_2d_to_2d;__cl_copy_image_2d_to_buffer;__cl_copy_buffer_to_image_2d;__cl_fill_image_2d;__cl_fill_image_2d_array;"; + const char * builtin_kernels_3d = "__cl_copy_image_3d_to_2d;__cl_copy_image_2d_to_3d;__cl_copy_image_3d_to_3d;__cl_copy_image_3d_to_buffer;__cl_copy_buffer_to_image_3d;__cl_fill_image_3d"; +if (!strstr(device->built_in_kernels, n)){ + return 0; +}else if(strstr(builtin_kernels_2d, n)){ + return 2; +}else if(strstr(builtin_kernels_3d, n)){ + return 3; +}else + return 1; + +} LOCAL size_t cl_get_kernel_max_wg_sz(cl_kernel kernel) @@ -543,6 +559,7 @@ cl_get_kernel_workgroup_info(cl_kernel kernel, size_t* param_value_size_ret) { int err = CL_SUCCESS; + int dimension = 0; if (UNLIKELY(device != &intel_ivb_gt1_device && device != &intel_ivb_gt2_device && device != &intel_baytrail_t_device && @@ -573,6 +590,23 @@ cl_get_kernel_workgroup_info(cl_kernel kernel, } DECL_FIELD(COMPILE_WORK_GROUP_SIZE, kernel->compile_wg_sz) DECL_FIELD(PRIVATE_MEM_SIZE, kernel->stack_size) +case CL_KERNEL_GLOBAL_WORK_SIZE: + dimension = cl_check_builtin_kernel_dimension(kernel, device); + if ( !dimension ) return CL_INVALID_VALUE; + if (param_value_size_ret != NULL) +*param_value_size_ret = sizeof(device->max_1d_global_work_sizes); + if (param_value) { +if (dimension == 1) { + memcpy(param_value, device->max_1d_global_work_sizes, sizeof(device->max_1d_global_work_sizes)); +}else if(dimension == 2){ + memcpy(param_value, device->max_2d_global_work_sizes, sizeof(device->max_2d_global_work_sizes)); +}else if(dimension == 3){ + memcpy(param_value, device->max_3d_global_work_sizes, sizeof(device->max_3d_global_work_sizes)); +}else + return CL_INVALID_VALUE; + +return CL_SUCCESS; + } default: return CL_INVALID_VALUE; }; diff --git a/src/cl_device_id.h b/src/cl_device_id.h index c4f8227..31bce47 100644 --- a/src/cl_device_id.h +++ b/src/cl_device_id.h @@ -30,6 +30,9 @@ struct _cl_device_id { cl_uint max_work_item_dimensions; // should be 3. size_t max_work_item_sizes[3]; // equal to maximum work group size. size_t max_work_group_size;// maximum work group size under simd16 mode. + size_t max_1d_global_work_sizes[3]; // maximum 1d global work size for builtin kernels. + size_t max_2d_global_work_sizes[3]; // maximum 2d global work size for builtin kernels. + size_t max_3d_global_work_sizes[3]; // maximum 3d global work size for builtin kernels. cl_uint preferred_vector_width_char; cl_uint preferred_vector_width_short; cl_uint preferred_vector_width_int; diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h index 33ef1f0..3cd54eb 100644 --- a/src/cl_gt_device.h +++ b/src/cl_gt_device.h @@ -21,6 +21,9 @@ .device_type = CL_DEVICE_TYPE_GPU, .vendor_id = 0, /* == device_id (set when requested) */ .max_work_item_dimensions = 3, +.max_1d_global_work_sizes = {1024 * 1024 * 256, 1, 1}, +.max_2d_global_work_sizes = {8192, 8192, 1}, +.max_3d_global_work_sizes = {8192, 8192, 2048}, .preferred_vector_width_char = 8, .preferred_vector_width_short = 8, .preferred_vector_width_int = 4, diff --git a/src/cl_kernel.h b/src/cl_kernel.h index f4ed8d3..85a997d 100644 --- a/src/cl_kernel.h +++ b/src/cl_kernel.h @@ -59,6 +59,8 @@ struct _cl_kernel { cl_ulong local_mem_sz; /* local
[Beignet] [PATCH] fix piglit cl-api-set-kernel-arg fail.
From: Luo Xionghu the memory object should be checked whether valid in context buffers before being set as kernel arguments. Signed-off-by: Luo Xionghu --- src/cl_kernel.c |4 src/cl_mem.c| 12 src/cl_mem.h|3 +++ 3 files changed, 19 insertions(+) diff --git a/src/cl_kernel.c b/src/cl_kernel.c index 5ab9c55..8eec907 100644 --- a/src/cl_kernel.c +++ b/src/cl_kernel.c @@ -99,6 +99,7 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value) enum gbe_arg_type arg_type; /* kind of argument */ size_t arg_sz; /* size of the argument */ cl_mem mem = NULL; /* for __global, __constant and image arguments */ + cl_context ctx = k->program->ctx; if (UNLIKELY(index >= k->arg_n)) return CL_INVALID_ARG_INDEX; @@ -136,6 +137,9 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value) if(value != NULL) mem = *(cl_mem*)value; if(value != NULL && mem) { + if (!mem_in_buffers(mem, ctx->buffers)) +return CL_INVALID_ARG_VALUE; + if (UNLIKELY(mem->magic != CL_MAGIC_MEM_HEADER)) return CL_INVALID_MEM_OBJECT; diff --git a/src/cl_mem.c b/src/cl_mem.c index 11411d9..d4bbe2c 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -289,6 +289,18 @@ error: } +LOCAL cl_bool +mem_in_buffers(cl_mem mem, cl_mem buffers) +{ + cl_mem tmp = buffers; + while(tmp){ +if(mem == tmp) + return CL_TRUE; +tmp = tmp->next; + } + return CL_FALSE; +} + LOCAL cl_mem cl_mem_new_buffer(cl_context ctx, cl_mem_flags flags, diff --git a/src/cl_mem.h b/src/cl_mem.h index 57f38f1..3bcad18 100644 --- a/src/cl_mem.h +++ b/src/cl_mem.h @@ -177,6 +177,9 @@ extern cl_int cl_get_mem_object_info(cl_mem, cl_mem_info, size_t, void *, size_t /* Query information about an image */ extern cl_int cl_get_image_info(cl_mem, cl_image_info, size_t, void *, size_t *); +/* Query whether mem is in buffers */ +extern cl_bool mem_in_buffers(cl_mem mem, cl_mem buffers); + /* Create a new memory object and initialize it with possible user data */ extern cl_mem cl_mem_new_buffer(cl_context, cl_mem_flags, size_t, void*, cl_int*); -- 1.7.9.5 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH] fix piglit cl-api-set-kernel-arg fail.
From: Luo Xionghu the memory object should be checked whether valid in context buffers before being set as kernel arguments. v2: rename the function from mem_in_buffers to is_valid_mem, move the magic header check into it. Signed-off-by: Luo Xionghu --- src/cl_kernel.c |3 ++- src/cl_mem.c| 15 +++ src/cl_mem.h|3 +++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/cl_kernel.c b/src/cl_kernel.c index 5ab9c55..d7c2f7c 100644 --- a/src/cl_kernel.c +++ b/src/cl_kernel.c @@ -99,6 +99,7 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value) enum gbe_arg_type arg_type; /* kind of argument */ size_t arg_sz; /* size of the argument */ cl_mem mem = NULL; /* for __global, __constant and image arguments */ + cl_context ctx = k->program->ctx; if (UNLIKELY(index >= k->arg_n)) return CL_INVALID_ARG_INDEX; @@ -136,7 +137,7 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value) if(value != NULL) mem = *(cl_mem*)value; if(value != NULL && mem) { - if (UNLIKELY(mem->magic != CL_MAGIC_MEM_HEADER)) + if( CL_SUCCESS != is_valid_mem(mem, ctx->buffers)) return CL_INVALID_MEM_OBJECT; if (UNLIKELY((arg_type == GBE_ARG_IMAGE && !IS_IMAGE(mem)) diff --git a/src/cl_mem.c b/src/cl_mem.c index 11411d9..077f1d7 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -289,6 +289,21 @@ error: } +LOCAL cl_int +is_valid_mem(cl_mem mem, cl_mem buffers) +{ + cl_mem tmp = buffers; + while(tmp){ +if(mem == tmp){ + if (UNLIKELY(mem->magic != CL_MAGIC_MEM_HEADER)) +return CL_INVALID_MEM_OBJECT; + return CL_SUCCESS; +} +tmp = tmp->next; + } + return CL_INVALID_MEM_OBJECT; +} + LOCAL cl_mem cl_mem_new_buffer(cl_context ctx, cl_mem_flags flags, diff --git a/src/cl_mem.h b/src/cl_mem.h index 57f38f1..0ccbb5d 100644 --- a/src/cl_mem.h +++ b/src/cl_mem.h @@ -177,6 +177,9 @@ extern cl_int cl_get_mem_object_info(cl_mem, cl_mem_info, size_t, void *, size_t /* Query information about an image */ extern cl_int cl_get_image_info(cl_mem, cl_image_info, size_t, void *, size_t *); +/* Query whether mem is in buffers */ +extern cl_int is_valid_mem(cl_mem mem, cl_mem buffers); + /* Create a new memory object and initialize it with possible user data */ extern cl_mem cl_mem_new_buffer(cl_context, cl_mem_flags, size_t, void*, cl_int*); -- 1.7.9.5 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH] fix piglit get kernel info FUNCTION ATTRIBUTE fail.
From: Luo the backend need return the kernel FUNCTION ATTRIBUTE message to the clGetKernelInfo. there are 3 kind of function attribute so far, vec_type_hint parameter is not available to return due to llvm lack of such info. Signed-off-by: Luo --- backend/src/backend/program.cpp |9 +++ backend/src/backend/program.h |4 +++ backend/src/backend/program.hpp |6 + backend/src/gbe_bin_interpreter.cpp |1 + backend/src/ir/function.hpp |5 backend/src/llvm/llvm_gen_backend.cpp | 45 +++ src/cl_api.c|3 +++ src/cl_gbe_loader.cpp |5 src/cl_gbe_loader.h |1 + src/cl_kernel.c |7 + src/cl_kernel.h |3 +++ utests/CMakeLists.txt |1 + utests/compiler_function_qualifiers.cpp | 10 +++ 13 files changed, 100 insertions(+) diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp index be83108..2308770 100644 --- a/backend/src/backend/program.cpp +++ b/backend/src/backend/program.cpp @@ -154,6 +154,7 @@ namespace gbe { kernel->setImageSet(pair.second->getImageSet()); kernel->setPrintfSet(pair.second->getPrintfSet()); kernel->setCompileWorkGroupSize(pair.second->getCompileWorkGroupSize()); + kernel->setFunctionAttributes(pair.second->getFunctionAttributes()); kernels.insert(std::make_pair(name, kernel)); } return true; @@ -895,6 +896,12 @@ namespace gbe { return kernel->getName(); } + static const char *kernelGetAttributes(gbe_kernel genKernel) { +if (genKernel == NULL) return NULL; +const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel; +return kernel->getFunctionAttributes(); + } + static const char *kernelGetCode(gbe_kernel genKernel) { if (genKernel == NULL) return NULL; const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel; @@ -,6 +1118,7 @@ GBE_EXPORT_SYMBOL gbe_program_get_kernel_num_cb *gbe_program_get_kernel_num = NU GBE_EXPORT_SYMBOL gbe_program_get_kernel_by_name_cb *gbe_program_get_kernel_by_name = NULL; GBE_EXPORT_SYMBOL gbe_program_get_kernel_cb *gbe_program_get_kernel = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_name_cb *gbe_kernel_get_name = NULL; +GBE_EXPORT_SYMBOL gbe_kernel_get_attributes_cb *gbe_kernel_get_attributes = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_code_cb *gbe_kernel_get_code = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_code_size_cb *gbe_kernel_get_code_size = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_arg_num_cb *gbe_kernel_get_arg_num = NULL; @@ -1158,6 +1166,7 @@ namespace gbe gbe_program_get_kernel_by_name = gbe::programGetKernelByName; gbe_program_get_kernel = gbe::programGetKernel; gbe_kernel_get_name = gbe::kernelGetName; + gbe_kernel_get_attributes = gbe::kernelGetAttributes; gbe_kernel_get_code = gbe::kernelGetCode; gbe_kernel_get_code_size = gbe::kernelGetCodeSize; gbe_kernel_get_arg_num = gbe::kernelGetArgNum; diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h index c63ae6a..0e773f4 100644 --- a/backend/src/backend/program.h +++ b/backend/src/backend/program.h @@ -271,6 +271,10 @@ extern gbe_program_get_kernel_cb *gbe_program_get_kernel; typedef const char *(gbe_kernel_get_name_cb)(gbe_kernel); extern gbe_kernel_get_name_cb *gbe_kernel_get_name; +/*! Get the kernel attributes*/ +typedef const char *(gbe_kernel_get_attributes_cb)(gbe_kernel); +extern gbe_kernel_get_attributes_cb *gbe_kernel_get_attributes; + /*! Get the kernel source code */ typedef const char *(gbe_kernel_get_code_cb)(gbe_kernel); extern gbe_kernel_get_code_cb *gbe_kernel_get_code; diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp index 6a8af61..4f9b68a 100644 --- a/backend/src/backend/program.hpp +++ b/backend/src/backend/program.hpp @@ -176,6 +176,11 @@ namespace gbe { wg_sz[1] = compileWgSize[1]; wg_sz[2] = compileWgSize[2]; } +/*! Set function attributes string. */ +void setFunctionAttributes(const std::string& functionAttributes) { this->functionAttributes= functionAttributes; } +/*! Get function attributes string. */ +const char* getFunctionAttributes(void) const {return this->functionAttributes.c_str();} + /*! Get defined image size */ size_t getImageSize(void) const { return (imageSet == NULL ? 0 : imageSet->getDataSize()); } /*! Get defined image value array */ @@ -228,6 +233,7 @@ namespace gbe { ir::ImageSet *imageSet;//!< Copy from the corresponding function. ir::PrintfSet *printfSet; //!< Copy from the corresponding function. size_t compileWgSize[3]; //!< required work group size by kernel attribute. +std::string functionAttributes; //!< function attribute qualifiers combined. GBE_CLASS(Kernel); //!< Use custom all
[Beignet] [PATCH] fix bin/cl-program-tester tests/cl/program/execute/attributes.cl regression.
From: Luo Xionghu work_group_size_hint should define another variable. Signed-off-by: Luo Xionghu --- backend/src/llvm/llvm_gen_backend.cpp | 13 +++-- 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index b0e02ca..918af24 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -1249,6 +1249,7 @@ namespace gbe // Loop over the kernel metadatas to set the required work group size. NamedMDNode *clKernelMetaDatas = TheModule->getNamedMetadata("opencl.kernels"); size_t reqd_wg_sz[3] = {0, 0, 0}; +size_t hint_wg_sz[3] = {0, 0, 0}; ir::FunctionArgument::InfoFromLLVM llvmInfo; MDNode *node = NULL; MDNode *addrSpaceNode = NULL; @@ -1320,18 +1321,18 @@ namespace gbe ConstantInt *y = dyn_cast(attrNode->getOperand(2)); ConstantInt *z = dyn_cast(attrNode->getOperand(3)); GBE_ASSERT(x && y && z); -reqd_wg_sz[0] = x->getZExtValue(); -reqd_wg_sz[1] = y->getZExtValue(); -reqd_wg_sz[2] = z->getZExtValue(); +hint_wg_sz[0] = x->getZExtValue(); +hint_wg_sz[1] = y->getZExtValue(); +hint_wg_sz[2] = z->getZExtValue(); functionAttributes += attrName->getString(); std::stringstream param; char buffer[100]; param <<"("; -param << reqd_wg_sz[0]; +param << hint_wg_sz[0]; param << ","; -param << reqd_wg_sz[1]; +param << hint_wg_sz[1]; param << ","; -param << reqd_wg_sz[2]; +param << hint_wg_sz[2]; param <<")"; param >> buffer; functionAttributes += buffer; -- 1.7.9.5 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH 0/3] SelfLoop enable.
From: Luo Xionghu this patchset enables the selfloop with "WHILE" instruction. a regression is introduced in utest case builtin_remquo: phenomenon is the executed block sequence is different, maybe there is some bugs in buildJIPs or somewhere else, calling for zhigang's help. Luo Xionghu (3): Add Gen IR WHILE. add handleSelfLoopNode to insert while instruction on Gen IR level. Use instruction WHILE to manipulate structure. backend/src/backend/gen_context.cpp | 10 +++ backend/src/backend/gen_encoder.cpp | 13 - backend/src/backend/gen_encoder.hpp |2 ++ backend/src/backend/gen_insn_scheduling.cpp |2 +- backend/src/backend/gen_insn_selection.cpp | 20 + backend/src/backend/gen_insn_selection.hxx |1 + backend/src/ir/function.hpp |3 ++ backend/src/ir/instruction.cpp |7 - backend/src/ir/instruction.hpp |2 ++ backend/src/ir/instruction.hxx |1 + backend/src/ir/structural_analysis.cpp | 42 --- backend/src/ir/structural_analysis.hpp |4 +-- 12 files changed, 91 insertions(+), 16 deletions(-) -- 1.7.9.5 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH 1/3] Add Gen IR WHILE.
From: Luo Xionghu Add Gen IR WHILE to mark the strucutred region. Signed-off-by: Luo Xionghu --- backend/src/ir/instruction.cpp |7 ++- backend/src/ir/instruction.hpp |2 ++ backend/src/ir/instruction.hxx |1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index 370fb87..b8beea1 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -349,7 +349,7 @@ namespace ir { { public: INLINE BranchInstruction(Opcode op, LabelIndex labelIndex, Register predicate, bool inv_pred=false) { -GBE_ASSERT(op == OP_BRA || op == OP_IF); +GBE_ASSERT(op == OP_BRA || op == OP_IF || op == OP_WHILE); this->opcode = op; this->predicate = predicate; this->labelIndex = labelIndex; @@ -1634,6 +1634,11 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex return internal::BranchInstruction(OP_ENDIF, labelIndex).convert(); } + // WHILE + Instruction WHILE(LabelIndex labelIndex, Register pred) { +return internal::BranchInstruction(OP_WHILE, labelIndex, pred).convert(); + } + // RET Instruction RET(void) { return internal::BranchInstruction(OP_RET).convert(); diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 39fb2db..afaedff 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -670,6 +670,8 @@ namespace ir { Instruction ELSE(LabelIndex labelIndex); /*! endif */ Instruction ENDIF(LabelIndex labelIndex); + /*! (pred) while labelIndex */ + Instruction WHILE(LabelIndex labelIndex, Register pred); /*! ret */ Instruction RET(void); /*! load.type.space {dst1,...,dst_valueNum} offset value */ diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index abc984f..f86d0e1 100644 --- a/backend/src/ir/instruction.hxx +++ b/backend/src/ir/instruction.hxx @@ -96,3 +96,4 @@ DECL_INSN(MAD, TernaryInstruction) DECL_INSN(IF, BranchInstruction) DECL_INSN(ENDIF, BranchInstruction) DECL_INSN(ELSE, BranchInstruction) +DECL_INSN(WHILE, BranchInstruction) -- 1.7.9.5 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH 2/3] add handleSelfLoopNode to insert while instruction on Gen IR level.
From: Luo Xionghu Signed-off-by: Luo Xionghu --- backend/src/backend/gen_encoder.cpp|2 +- backend/src/ir/function.hpp|3 +++ backend/src/ir/structural_analysis.cpp | 40 backend/src/ir/structural_analysis.hpp |4 ++-- 4 files changed, 36 insertions(+), 13 deletions(-) diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp index 26e997d..c67e85e 100644 --- a/backend/src/backend/gen_encoder.cpp +++ b/backend/src/backend/gen_encoder.cpp @@ -1044,7 +1044,7 @@ namespace gbe this->setSrc1(&insn, GenRegister::immd(jumpDistance)); return; } - else if (insn.header.opcode == GEN_OPCODE_JMPI) { + else if (insn.header.opcode == GEN_OPCODE_JMPI){ jumpDistance = jumpDistance - 2; } else if(insn.header.opcode == GEN_OPCODE_ENDIF) diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp index c5582b4..fc5ad45 100644 --- a/backend/src/ir/function.hpp +++ b/backend/src/ir/function.hpp @@ -142,6 +142,9 @@ namespace ir { * else node into all the basic blocks belong to 'then' part while the liveout is * calculated in structural_analysis.cpp:calculateNecessaryLiveout(); */ std::set liveout; +/* selfLoop's label. + * */ +LabelIndex whileLabel; private: friend class Function; //!< Owns the basic blocks BlockSet predecessors; //!< Incoming blocks diff --git a/backend/src/ir/structural_analysis.cpp b/backend/src/ir/structural_analysis.cpp index 459a61e..1c4bf40 100644 --- a/backend/src/ir/structural_analysis.cpp +++ b/backend/src/ir/structural_analysis.cpp @@ -57,6 +57,23 @@ namespace analysis iter++; } } + void ControlTree::handleSelfLoopNode(Node *loopnode, ir::LabelIndex& whileLabel) + { +ir::BasicBlock *pbb = loopnode->getExit(); +ir::BranchInstruction* pinsn = static_cast(pbb->getLastInstruction()); +ir::Register reg = pinsn->getPredicateIndex(); +ir::BasicBlock::iterator it = pbb->end(); +it--; +/* since this node is an while node, so we remove the BRA instruction at the bottom of the exit BB of 'node', + * and insert WHILE instead + */ +pbb->erase(it); +whileLabel = pinsn->getLabelIndex(); +ir::Instruction insn = ir::WHILE(whileLabel, reg); +ir::Instruction* p_new_insn = pbb->getParent().newInstruction(insn); +pbb->append(*p_new_insn); +pbb->whileLabel = whileLabel; + } /* recursive mark the bbs' variable needEndif, the bbs all belong to node.*/ void ControlTree::markNeedIf(Node *node, bool status) @@ -207,7 +224,7 @@ namespace analysis * structures */ while(rit != nodes.rend()) { - if((*rit)->type() == IfThen || (*rit)->type() == IfElse) + if((*rit)->type() == IfThen || (*rit)->type() == IfElse|| (*rit)->type() == SelfLoop) { if(false == (*rit)->mark && (*rit)->canBeHandled) { @@ -229,7 +246,7 @@ namespace analysis } else if((*rit)->type() == SelfLoop || (*rit)->type() == WhileLoop) { -printf("process loop\n"); + } rit++; } @@ -260,12 +277,12 @@ namespace analysis */ while(rit != nodes.rend()) { - if(((*rit)->type() == IfThen || (*rit)->type() == IfElse || (*rit)->type() == Block) && + if(((*rit)->type() == IfThen || (*rit)->type() == IfElse || (*rit)->type() == Block ||(*rit)->type() == SelfLoop) && (*rit)->canBeHandled && (*rit)->mark == true) { markStructuredNodes(*rit, false); std::set ns = getStructureBasicBlocksIndex(*rit, bbs); -ir::BasicBlock *entry = (*it)->getEntry(); +ir::BasicBlock *entry = (*rit)->getEntry(); int entryIndex = *(ns.begin()); for(size_t i=0; ichildren.begin(); + ir::LabelIndex whilelabel; + handleSelfLoopNode(*child_iter, whilelabel); +} +break; + default: break; } @@ -841,7 +866,6 @@ namespace analysis * ignore the identification of cyclic regions. */ Node * ControlTree::cyclicRegionType(Node *node, NodeList &nset) { -#if 0 /* check for self-loop */ if(nset.size() == 1) { @@ -874,7 +898,6 @@ namespace analysis if(node->succs().size() == 2 && (*m)->succs().size() == 1 && node->preds().size() == 2 && (*m)->preds().size() == 1) { -printf("WhileLoop!\n\n"); Node* p = new WhileLoopNode(node, *m); p->canBeHandled = false; @@ -882,7 +905,6 @@ namespace analysis return insertNode(p); } } -#endif return NULL; } @@ -1008,7 +1030,6 @@ namespace analysis else { /* We now only deal with acyclic regions at this moment. */ -#if 0 reachUnder.clear(); nset.clear(); for(NodeList::const_iterator m = post_order.begin(); m != post_order.
[Beignet] [PATCH 3/3] Use instruction WHILE to manipulate structure.
From: Luo Xionghu 1. WHILE instruction should be non-schedulable. 2. if this WHILE instruction jumps to an ELSE instruction, the distance need add 2. Signed-off-by: Luo Xionghu --- backend/src/backend/gen_context.cpp | 10 ++ backend/src/backend/gen_encoder.cpp | 11 +++ backend/src/backend/gen_encoder.hpp |2 ++ backend/src/backend/gen_insn_scheduling.cpp |2 +- backend/src/backend/gen_insn_selection.cpp | 20 backend/src/backend/gen_insn_selection.hxx |1 + backend/src/ir/structural_analysis.cpp |2 +- 7 files changed, 46 insertions(+), 2 deletions(-) diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index ba4a8f8..6cbfa43 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -254,6 +254,16 @@ namespace gbe p->ELSE(src); } break; + case SEL_OP_WHILE: +{ + /*const ir::LabelIndex label0(insn.index), label1(insn.index1); + const LabelPair labelPair(label0, label1); + const GenRegister src = ra->genReg(insn.src(0)); + this->branchPos3.push_back(std::make_pair(labelPair, p->store.size()));*/ + insertJumpPos(insn); + p->WHILE(src); +} +break; default: NOT_IMPLEMENTED; } } diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp index c67e85e..295e11d 100644 --- a/backend/src/backend/gen_encoder.cpp +++ b/backend/src/backend/gen_encoder.cpp @@ -1026,6 +1026,7 @@ namespace gbe ALU2_BRA(IF) ALU2_BRA(ELSE) ALU2_BRA(ENDIF) + ALU2_BRA(WHILE) ALU2_BRA(BRD) ALU2_BRA(BRC) @@ -1037,8 +1038,18 @@ namespace gbe insn.header.opcode == GEN_OPCODE_ENDIF || insn.header.opcode == GEN_OPCODE_IF || insn.header.opcode == GEN_OPCODE_BRC || + insn.header.opcode == GEN_OPCODE_WHILE || insn.header.opcode == GEN_OPCODE_ELSE); +if( insn.header.opcode == GEN_OPCODE_WHILE ){ + // if this WHILE instruction jump back to an ELSE instruction, + // need add distance to go to the next instruction. + GenNativeInstruction & insn_else = *(GenNativeInstruction *)&this->store[insnID+jumpDistance]; + if(insn_else.header.opcode == GEN_OPCODE_ELSE){ +jumpDistance += 2; + } +} + if (insn.header.opcode != GEN_OPCODE_JMPI || (jumpDistance > -32769 && jumpDistance < 32768)) { if (insn.header.opcode == GEN_OPCODE_IF) { this->setSrc1(&insn, GenRegister::immd(jumpDistance)); diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp index 9844eb8..2c999ce 100644 --- a/backend/src/backend/gen_encoder.hpp +++ b/backend/src/backend/gen_encoder.hpp @@ -154,6 +154,8 @@ namespace gbe void ELSE(GenRegister src); /*! ENDIF indexed instruction */ void ENDIF(GenRegister src); +/*! WHILE indexed instruction */ +void WHILE(GenRegister src); /*! BRC indexed instruction */ void BRC(GenRegister src); /*! BRD indexed instruction */ diff --git a/backend/src/backend/gen_insn_scheduling.cpp b/backend/src/backend/gen_insn_scheduling.cpp index 4324206..035a021 100644 --- a/backend/src/backend/gen_insn_scheduling.cpp +++ b/backend/src/backend/gen_insn_scheduling.cpp @@ -590,7 +590,7 @@ namespace gbe for (int32_t insnID = 0; insnID < insnNum; ++insnID) { ScheduleDAGNode *node = tracker.insnNodes[insnID]; if (node->insn.isBranch() || node->insn.isLabel() - || node->insn.opcode == SEL_OP_EOT || node->insn.opcode == SEL_OP_IF + || node->insn.opcode == SEL_OP_EOT || node->insn.opcode == SEL_OP_IF || node->insn.opcode == SEL_OP_WHILE || node->insn.opcode == SEL_OP_BARRIER) tracker.makeBarrier(insnID, insnNum); } diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 170a9d8..4509072 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -543,6 +543,8 @@ namespace gbe void ELSE(Reg src, ir::LabelIndex jip, ir::LabelIndex elseLabel); /*! ENDIF indexed instruction */ void ENDIF(Reg src, ir::LabelIndex jip, ir::LabelIndex endifLabel = ir::LabelIndex(0)); +/*! WHILE indexed instruction */ +void WHILE(Reg src, ir::LabelIndex jip); /*! BRD indexed instruction */ void BRD(Reg src, ir::LabelIndex jip); /*! BRC indexed instruction */ @@ -1062,6 +1064,12 @@ namespace gbe insn->index = uint16_t(this->block->endifLabel); } + void Selection::Opaque::WHILE(Reg src, ir::LabelIndex jip) { +SelectionInstruction *insn = this->appendInsn(SEL_OP_WHILE, 0, 1); +insn->src(0) = src; +insn->index = uint16_t(jip); + } + void Selection::Opaque::CMP(uint32_t conditional, Reg src0, Reg src1, Reg dst) { SelectionInst
[Beignet] [PATCH] remove the LinkOnceAnyLinkage since the libocl is introduced.
From: Luo Xionghu no need to set the LinkOnceAnyLinkage for global variables and functions to avoid redefinition. Signed-off-by: Luo Xionghu --- backend/src/backend/gen_program.cpp |9 - 1 file changed, 9 deletions(-) diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index 44c9c10..bd0c070 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -367,15 +367,6 @@ namespace gbe { }else{ //set the global variables and functions to link once to fix redefine. llvm::Module* src = (llvm::Module*)((GenProgram*)src_program)->module; - for (llvm::Module::global_iterator I = src->global_begin(), E = src->global_end(); I != E; ++I) { -I->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); - } - - for (llvm::Module::iterator I = src->begin(), E = src->end(); I != E; ++I) { -llvm::Function *F = llvm::dyn_cast(I); -if (F && isKernelFunction(*F)) continue; -I->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); - } llvm::Module* dst = (llvm::Module*)((GenProgram*)dst_program)->module; llvm::Linker::LinkModules( dst, src, -- 1.7.9.5 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH] improve the build performance of vector type built-in function.
From: Luo Xionghu this patch was lost during the libocl merge. resubmit it to improve the vector function performance. please refer to e2db890596eea0a6eb741e11e576a38952f1ed1e for detail. Signed-off-by: Luo Xionghu --- backend/src/libocl/script/gen_vector.py | 45 ++- 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/backend/src/libocl/script/gen_vector.py b/backend/src/libocl/script/gen_vector.py index a91dfcf..de28552 100755 --- a/backend/src/libocl/script/gen_vector.py +++ b/backend/src/libocl/script/gen_vector.py @@ -289,9 +289,42 @@ class builtinProto(): formatStr += ';' self.append(formatStr) return formatStr -formatStr = self.append(formatStr, '{{return ({0}{1})('.format(vtype[0], vtype[1])) -self.indent = len(formatStr) -for j in range(0, vtype[1]): +if self.functionName != 'select' and ptypeSeqs[0] == ptypeSeqs[self.paramCount-1] and ptype[1] > 4: +formatStr += '\n{ \n union{' +formatStr = self.append(formatStr, '{0} va[{1}];'.format(vtype[0], vtype[1])) +formatStr = self.append(formatStr, '{0}{1} vv{2};'.format(vtype[0], vtype[1], vtype[1])) +formatStr += '\n }uret;' +formatStr += '\n union{' +formatStr = self.append(formatStr, '{0} pa[{1}];'.format(ptype[0], ptype[1])) +formatStr = self.append(formatStr, '{0}{1} pv{2};'.format(ptype[0], ptype[1], ptype[1])) +formatStr += '\n }' +for n in range(0, self.paramCount): + formatStr += 'usrc{0}'.format(n) + if n+1 != self.paramCount: +formatStr +=', ' +formatStr += ';' + +for n in range(0, self.paramCount): + formatStr = self.append(formatStr, ' usrc{0}.pv{1} = param{2};'.format(n, ptype[1], n)) +formatStr = self.append(formatStr, ' for(int i =0; i < {0}; i++)'.format(ptype[1])) +formatStr += '\nuret.va[i] = ' +if self.prefix == 'relational' and self.functionName != 'bitselect' and self.functionName != 'select': + formatStr += '-' +formatStr += '{0}('.format(self.functionName) + +for n in range(0, self.paramCount): + formatStr += 'usrc{0}.pa[i]'.format(n) + if n+1 != self.paramCount: +formatStr +=', ' +formatStr += ');' +formatStr = self.append(formatStr, ' return uret.vv{0};'.format(vtype[1])) +formatStr += '\n}' +formatStr = self.append(formatStr) +return formatStr +else: + formatStr = self.append(formatStr, '{{return ({0}{1})('.format(vtype[0], vtype[1])) + self.indent = len(formatStr) + for j in range(0, vtype[1]): if (j != 0): formatStr += ',' if (j + 1) % 2 == 0: @@ -326,10 +359,10 @@ class builtinProto(): formatStr += ')' -formatStr += '); }\n' -self.append(formatStr) + formatStr += '); }\n' + self.append(formatStr) -return formatStr + return formatStr def output(self): for line in self.outputStr: -- 1.7.9.5 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH] fix switch bug and utest memory leak.
From: Luo Xionghu Signed-off-by: Luo Xionghu --- src/cl_device_id.c|1 + utests/builtin_kernel_max_global_size.cpp |5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/cl_device_id.c b/src/cl_device_id.c index ee3f2b7..1ce5380 100644 --- a/src/cl_device_id.c +++ b/src/cl_device_id.c @@ -607,6 +607,7 @@ cl_get_kernel_workgroup_info(cl_kernel kernel, return CL_SUCCESS; } + return CL_SUCCESS; default: return CL_INVALID_VALUE; }; diff --git a/utests/builtin_kernel_max_global_size.cpp b/utests/builtin_kernel_max_global_size.cpp index c777564..e6910cd 100644 --- a/utests/builtin_kernel_max_global_size.cpp +++ b/utests/builtin_kernel_max_global_size.cpp @@ -18,12 +18,13 @@ void builtin_kernel_max_global_size(void) OCL_ASSERT(builtin_kernel_1d != NULL); size_t param_value_size; void* param_value; - clGetKernelWorkGroupInfo(builtin_kernel_1d, device, CL_KERNEL_GLOBAL_WORK_SIZE, 0, NULL, ¶m_value_size); + OCL_CALL(clGetKernelWorkGroupInfo, builtin_kernel_1d, device, CL_KERNEL_GLOBAL_WORK_SIZE, 0, NULL, ¶m_value_size); param_value = malloc(param_value_size); - clGetKernelWorkGroupInfo(builtin_kernel_1d, device, CL_KERNEL_GLOBAL_WORK_SIZE, param_value_size, param_value, 0); + OCL_CALL(clGetKernelWorkGroupInfo, builtin_kernel_1d, device, CL_KERNEL_GLOBAL_WORK_SIZE, param_value_size, param_value, 0); OCL_ASSERT(*(size_t*)param_value == 256 * 1024 *1024); clReleaseKernel(builtin_kernel_1d); clReleaseProgram(built_in_prog); + free(built_in_kernel_names); free(param_value); } -- 1.7.9.5 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH V2] fix CL_KERNEL_GLOBAL_WORK_SIZE bug.
From: Luo the option CL_KERNEL_GLOBAL_WORK_SIZE for clGetKernelWorkGroupInfo should call built in kernel or custom device according to the spec, this patch calls the built in kernel to query the GLOBAL_WORK_SIZE. v2: use built in kernel to qury the GLOBAL_WORK_SIZE if exist, dummy kernel for other options, handle the case when no built in kernel is provided. Signed-off-by: Luo --- tests/cl/api/get-kernel-work-group-info.c | 66 +++-- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/tests/cl/api/get-kernel-work-group-info.c b/tests/cl/api/get-kernel-work-group-info.c index 47d09da..11d29d2 100644 --- a/tests/cl/api/get-kernel-work-group-info.c +++ b/tests/cl/api/get-kernel-work-group-info.c @@ -61,6 +61,11 @@ piglit_cl_test(const int argc, int i; cl_int errNo; cl_kernel kernel; + cl_program built_in_prog = NULL; + cl_kernel built_in_kernel = NULL; + cl_kernel temp_kernel; + size_t built_in_kernels_size; + size_t param_value_size; void* param_value; @@ -71,19 +76,65 @@ piglit_cl_test(const int argc, PIGLIT_CL_ENUM_ARRAY(cl_kernel_work_group_info); kernel = clCreateKernel(env->program, - "dummy_kernel", - &errNo); + "dummy_kernel", + &errNo); + + errNo = clGetDeviceInfo(env->device_id, CL_DEVICE_BUILT_IN_KERNELS, 0, 0, &built_in_kernels_size); if(!piglit_cl_check_error(errNo, CL_SUCCESS)) { fprintf(stderr, - "Failed (error code: %s): Create kernel.\n", + "Failed (error code: %s): Get Device Info.\n", piglit_cl_get_error_name(errNo)); return PIGLIT_FAIL; } + if(built_in_kernels_size != 0) + { + char* built_in_kernel_names; + char* kernel_name; + size_t ret_sz; + built_in_kernel_names = (char* )malloc(built_in_kernels_size * sizeof(char) ); + + errNo = clGetDeviceInfo(env->device_id, CL_DEVICE_BUILT_IN_KERNELS, built_in_kernels_size, (void*)built_in_kernel_names, &ret_sz); + if(!piglit_cl_check_error(errNo, CL_SUCCESS)) { + fprintf(stderr, + "Failed (error code: %s): Get Device Info.\n", + piglit_cl_get_error_name(errNo)); + return PIGLIT_FAIL; + } + + built_in_prog = clCreateProgramWithBuiltInKernels(env->context->cl_ctx, 1, &env->device_id, built_in_kernel_names, &errNo); + if(!piglit_cl_check_error(errNo, CL_SUCCESS)) { + fprintf(stderr, + "Failed (error code: %s): Create BuiltIn Program.\n", + piglit_cl_get_error_name(errNo)); + return PIGLIT_FAIL; + } + + kernel_name = strtok(built_in_kernel_names, ";"); + + built_in_kernel = clCreateKernel(built_in_prog, kernel_name, &errNo); + if(!piglit_cl_check_error(errNo, CL_SUCCESS)) { + fprintf(stderr, + "Failed (error code: %s): Create kernel.\n", + piglit_cl_get_error_name(errNo)); + return PIGLIT_FAIL; + } + free(built_in_kernel_names); + } + /*** Normal usage ***/ for(i = 0; i < num_kernel_work_group_infos; i++) { printf("%s ", piglit_cl_get_enum_name(kernel_work_group_infos[i])); + //use builtin kernel to test CL_KERNEL_GLOBAL_WORK_SIZE. swap the dummy kernel and builtin_kernel. + if(kernel_work_group_infos[i] == CL_KERNEL_GLOBAL_WORK_SIZE){ + if(built_in_kernel != NULL) { + temp_kernel = kernel; + kernel = built_in_kernel; + built_in_kernel = temp_kernel; + } + } + errNo = clGetKernelWorkGroupInfo(kernel, env->device_id, kernel_work_group_infos[i], @@ -114,6 +165,13 @@ piglit_cl_test(const int argc, piglit_merge_result(&result, PIGLIT_FAIL); } + if(kernel_work_group_infos[i] == CL_KERNEL_GLOBAL_WORK_SIZE){ + if(built_in_kernel != NULL) { + temp_kernel = kernel; + kernel = built_in_kernel; + built_in_kernel = temp_k
[Beignet] [PATCH] fix CL_KERNEL_GLOBAL_WORK_SIZE bug.
From: Luo the option CL_KERNEL_GLOBAL_WORK_SIZE for clGetKernelWorkGroupInfo should call built in kernel or custom device according to the spec, this patch calls the built in kernel to query the GLOBAL_WORK_SIZE. v2: use built in kernel to qury the GLOBAL_WORK_SIZE if exist, dummy kernel for other options, handle the case when no built in kernel is provided. v3: fix indent issue; loop CL_KERNEL_GLOBAL_WORK_SIZE out, test it with the platform supports opencl-1.2. Signed-off-by: Luo --- tests/cl/api/get-kernel-work-group-info.c | 127 + 1 file changed, 127 insertions(+) diff --git a/tests/cl/api/get-kernel-work-group-info.c b/tests/cl/api/get-kernel-work-group-info.c index 47d09da..f3fd6e5 100644 --- a/tests/cl/api/get-kernel-work-group-info.c +++ b/tests/cl/api/get-kernel-work-group-info.c @@ -61,6 +61,11 @@ piglit_cl_test(const int argc, int i; cl_int errNo; cl_kernel kernel; +#ifdef CL_VERSION_1_2 + cl_program built_in_prog = NULL; + cl_kernel built_in_kernel = NULL; + size_t built_in_kernels_size; +#endif size_t param_value_size; void* param_value; @@ -84,6 +89,17 @@ piglit_cl_test(const int argc, for(i = 0; i < num_kernel_work_group_infos; i++) { printf("%s ", piglit_cl_get_enum_name(kernel_work_group_infos[i])); +#ifdef CL_VERSION_1_2 + if(kernel_work_group_infos[i] == CL_KERNEL_GLOBAL_WORK_SIZE){ + if(env->version >= 12) { + continue; + }else{ + fprintf(stderr, "Could not query CL_KERNEL_GLOBAL_WORK_SIZE. Piglit was compiled against OpenCL version >= 1.2 and cannot run this test for versions < 1.2 because CL_KERNEL_GLOBAL_WORK_SIZE option is not present.\n"); + piglit_merge_result(&result, PIGLIT_FAIL); + } + } +#endif + errNo = clGetKernelWorkGroupInfo(kernel, env->device_id, kernel_work_group_infos[i], @@ -187,6 +203,117 @@ piglit_cl_test(const int argc, piglit_merge_result(&result, PIGLIT_FAIL); } +#ifdef CL_VERSION_1_2 + if(env->version < 12){ + fprintf(stderr, "Could not query CL_KERNEL_GLOBAL_WORK_SIZE. Piglit was compiled against OpenCL version >= 1.2 and cannot run this test for versions < 1.2 because CL_KERNEL_GLOBAL_WORK_SIZE option is not present.\n"); + piglit_merge_result(&result, PIGLIT_FAIL); + } + + //use builtin kernel to test CL_KERNEL_GLOBAL_WORK_SIZE. + errNo = clGetDeviceInfo(env->device_id, CL_DEVICE_BUILT_IN_KERNELS, 0, 0, &built_in_kernels_size); + if(!piglit_cl_check_error(errNo, CL_SUCCESS)) { + fprintf(stderr, + "Failed (error code: %s): Get Device Info.\n", + piglit_cl_get_error_name(errNo)); + piglit_merge_result(&result, PIGLIT_FAIL); + } + + if(built_in_kernels_size != 0) + { + char* built_in_kernel_names; + char* kernel_name; + size_t ret_sz; + built_in_kernel_names = (char* )malloc(built_in_kernels_size * sizeof(char) ); + + errNo = clGetDeviceInfo(env->device_id, CL_DEVICE_BUILT_IN_KERNELS, built_in_kernels_size, (void*)built_in_kernel_names, &ret_sz); + if(!piglit_cl_check_error(errNo, CL_SUCCESS)) { + fprintf(stderr, + "Failed (error code: %s): Get Device Info.\n", + piglit_cl_get_error_name(errNo)); + piglit_merge_result(&result, PIGLIT_FAIL); + } + + built_in_prog = clCreateProgramWithBuiltInKernels(env->context->cl_ctx, 1, &env->device_id, built_in_kernel_names, &errNo); + if(!piglit_cl_check_error(errNo, CL_SUCCESS)) { + fprintf(stderr, + "Failed (error code: %s): Create BuiltIn Program.\n", + piglit_cl_get_error_name(errNo)); + piglit_merge_result(&result, PIGLIT_FAIL); + } + + kernel_name = strtok(built_in_kernel_names, ";"); + + built_in_kernel = clCreateKernel(built_in_prog, kernel_name, &errNo); + if(!piglit_cl_check_error(errNo, CL_SUCCESS)) { + fprintf(stderr, + "Failed (error code: %s): Create kernel.\n", + piglit_cl_get_error_name(errNo)); + piglit_merge_result(&result, PIGLIT_FAIL); + } + free(built_in_kernel_names); + /* +* CL_INVALID_VALUE if kernel is not a built in kernel. +*/ + errNo = clGetKernelWorkGroupInfo(kernel, +
[Beignet] [PATCH] use global flag 0.0 to control unstructured simple block.
From: Luo Xionghu filter the simple block out and replace the if/endif with global flag to control. Signed-off-by: Luo Xionghu --- backend/src/backend/gen_insn_selection.cpp | 50 backend/src/backend/gen_insn_selection.hpp |1 + backend/src/backend/gen_reg_allocation.cpp |3 +- 3 files changed, 47 insertions(+), 7 deletions(-) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index f284ae1..e3547c6 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -217,7 +217,7 @@ namespace gbe // SelectionBlock /// - SelectionBlock::SelectionBlock(const ir::BasicBlock *bb) : bb(bb), isLargeBlock(false), endifLabel( (ir::LabelIndex) 0){} + SelectionBlock::SelectionBlock(const ir::BasicBlock *bb) : bb(bb), isLargeBlock(false), endifLabel( (ir::LabelIndex) 0), removeSimpleIfEndif(false){} void SelectionBlock::append(ir::Register reg) { tmp.push_back(reg); } @@ -403,6 +403,8 @@ namespace gbe uint32_t buildBasicBlockDAG(const ir::BasicBlock &bb); /*! Perform the selection on the basic block */ void matchBasicBlock(const ir::BasicBlock &bb, uint32_t insnNum); +/*! a simple block can use predication instead of if/endif*/ +bool isSimpleBlock(const ir::BasicBlock &bb, uint32_t insnNum); /*! A root instruction needs to be generated */ bool isRoot(const ir::Instruction &insn) const; @@ -1471,6 +1473,26 @@ namespace gbe return false; } + bool Selection::Opaque::isSimpleBlock(const ir::BasicBlock &bb, uint32_t insnNum) { +for (int32_t insnID = insnNum-1; insnID >= 0; --insnID) { + SelectionDAG &dag = *insnDAG[insnID]; + const ir::Instruction& insn = dag.insn; + if(insn.isMemberOf() || + insn.isMemberOf() || + insn.getOpcode() == ir::OP_SIMD_ANY || + insn.getOpcode() == ir::OP_SIMD_ALL || + insn.getOpcode() == ir::OP_ELSE) +return false; +} + +if(!(insnDAG[insnNum-1]->insn.isMemberOf()) || +insnDAG[insnNum-1]->insn.getOpcode() == ir::OP_ENDIF) + return false; + +return true; + } + + uint32_t Selection::Opaque::buildBasicBlockDAG(const ir::BasicBlock &bb) { using namespace ir; @@ -1551,7 +1573,9 @@ namespace gbe // Bottom up code generation bool needEndif = this->block->hasBranch == false && !this->block->hasBarrier; needEndif = needEndif && bb.needEndif; -if (needEndif) { +this->block->removeSimpleIfEndif = insnNum < 5 && isSimpleBlock(bb, insnNum); +//this->block->removeSimpleIfEndif = false;//this->block->removeSimpleIfEndif && needEndif; +if (needEndif && !this->block->removeSimpleIfEndif) { if(!bb.needIf) // this basic block is the exit of a structure this->ENDIF(GenRegister::immd(0), bb.endifLabel, bb.endifLabel); else { @@ -1572,6 +1596,12 @@ namespace gbe // Start a new code fragment this->startBackwardGeneration(); + +if(this->block->removeSimpleIfEndif){ + this->curr.predicate = GEN_PREDICATE_NORMAL; + this->curr.flag = 0; + this->curr.subFlag = 0; +} // If there is no branch at the end of this block. // Try all the patterns from best to worst @@ -1581,6 +1611,12 @@ namespace gbe ++it; } while (it != end); GBE_ASSERT(it != end); + +if(this->block->removeSimpleIfEndif){ + this->curr.predicate = GEN_PREDICATE_NONE; + this->curr.flag = 0; + this->curr.subFlag = 0; +} // If we are in if/endif fix mode, and this block is // large enough, we need to insert endif/if pair to eliminate // the too long if/endif block. @@ -3808,7 +3844,8 @@ namespace gbe sel.JMPI(GenRegister::immd(0), jip, label); sel.pop(); } -sel.push(); +if(!sel.block->removeSimpleIfEndif){ + sel.push(); sel.curr.predicate = GEN_PREDICATE_NORMAL; if(!insn.getParent()->needEndif && insn.getParent()->needIf) { ir::LabelIndex label = insn.getParent()->endifLabel; @@ -3816,7 +3853,8 @@ namespace gbe } else sel.IF(GenRegister::immd(0), sel.block->endifLabel, sel.block->endifLabel); -sel.pop(); + sel.pop(); +} } return true; @@ -4077,7 +4115,7 @@ namespace gbe sel.curr.predicate = GEN_PREDICATE_NORMAL; sel.MOV(ip, GenRegister::immuw(uint16_t(dst))); sel.curr.predicate = GEN_PREDICATE_NONE; - if (!sel.block->hasBarrier) + if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif) sel.ENDIF(GenRegister::immd(0), nextLabel); sel.block->endifOffset = -1; sel.pop(); @@ -4087,7 +4125,7 @@ namespace gbe
[Beignet] [PATCH 2/2] add utest popcount_int and popcount_short.
From: Luo Xionghu Signed-off-by: Luo Xionghu --- kernels/compiler_popcount_int.cl |4 kernels/compiler_popcount_short.cl |4 utests/CMakeLists.txt |2 ++ utests/compiler_popcount_int.cpp | 32 utests/compiler_popcount_short.cpp | 32 5 files changed, 74 insertions(+) create mode 100644 kernels/compiler_popcount_int.cl create mode 100644 kernels/compiler_popcount_short.cl create mode 100644 utests/compiler_popcount_int.cpp create mode 100644 utests/compiler_popcount_short.cpp diff --git a/kernels/compiler_popcount_int.cl b/kernels/compiler_popcount_int.cl new file mode 100644 index 000..b972dbc --- /dev/null +++ b/kernels/compiler_popcount_int.cl @@ -0,0 +1,4 @@ +kernel void compiler_popcount_int(global int *src, global int *dst) { + int i = get_global_id(0); + dst[i] = popcount(src[i]); +} diff --git a/kernels/compiler_popcount_short.cl b/kernels/compiler_popcount_short.cl new file mode 100644 index 000..e4204c5 --- /dev/null +++ b/kernels/compiler_popcount_short.cl @@ -0,0 +1,4 @@ +kernel void compiler_popcount_short(global short *src, global short *dst) { + int i = get_global_id(0); + dst[i] = popcount(src[i]); +} diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index b45ecf9..2fe6243 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -41,6 +41,8 @@ set (utests_sources compiler_ceil.cpp compiler_clz_short.cpp compiler_clz_int.cpp + compiler_popcount_short.cpp + compiler_popcount_int.cpp compiler_convert_uchar_sat.cpp compiler_copy_buffer.cpp compiler_copy_image.cpp diff --git a/utests/compiler_popcount_int.cpp b/utests/compiler_popcount_int.cpp new file mode 100644 index 000..a3f675e --- /dev/null +++ b/utests/compiler_popcount_int.cpp @@ -0,0 +1,32 @@ +#include "utest_helper.hpp" + +void compiler_popcount_int(void) +{ + const int n = 32; + + // Setup kernel and buffers + OCL_CREATE_KERNEL("compiler_popcount_int"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + globals[0] = n; + locals[0] = 16; + + OCL_MAP_BUFFER(0); + ((int*)buf_data[0])[0] = 0; + for (int32_t i = 1; i < (int32_t) n; ++i) +((int*)buf_data[0])[i] = 0xu >> i; + OCL_UNMAP_BUFFER(0); + + OCL_NDRANGE(1); + + OCL_MAP_BUFFER(1); + OCL_ASSERT(((int*)buf_data[1])[0] == 0); + for (int i = 1; i < n; ++i){ +OCL_ASSERT(((int*)buf_data[1])[i] == n-i); + } + OCL_UNMAP_BUFFER(1); +} + +MAKE_UTEST_FROM_FUNCTION(compiler_popcount_int); diff --git a/utests/compiler_popcount_short.cpp b/utests/compiler_popcount_short.cpp new file mode 100644 index 000..7aa1ebf --- /dev/null +++ b/utests/compiler_popcount_short.cpp @@ -0,0 +1,32 @@ +#include "utest_helper.hpp" + +void compiler_popcount_short(void) +{ + const int n = 16; + + // Setup kernel and buffers + OCL_CREATE_KERNEL("compiler_popcount_short"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(short), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(short), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + globals[0] = n; + locals[0] = 16; + + OCL_MAP_BUFFER(0); + ((short*)buf_data[0])[0] = 0; + for (int32_t i = 1; i < (int32_t) n; ++i) +((short*)buf_data[0])[i] = 0xu >> i; + OCL_UNMAP_BUFFER(0); + + OCL_NDRANGE(1); + + OCL_MAP_BUFFER(1); + OCL_ASSERT(((short*)buf_data[1])[0] == 0); + for (int i = 1; i < n; ++i){ +OCL_ASSERT(((short*)buf_data[1])[i] == short(n-i) ); + } + OCL_UNMAP_BUFFER(1); +} + +MAKE_UTEST_FROM_FUNCTION(compiler_popcount_short); -- 1.7.9.5 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH 1/2] add opencl-1.2 builtin function popcount.
From: Luo the popcount function returns the number of non-zero bits in input. use GEN instruction cbit(Count Bits Set) to implement it. Signed-off-by: Luo Xionghu --- backend/src/backend/gen/gen_mesa_disasm.c |1 + backend/src/backend/gen_context.cpp |1 + backend/src/backend/gen_defs.hpp|1 + backend/src/backend/gen_encoder.cpp |1 + backend/src/backend/gen_encoder.hpp |1 + backend/src/backend/gen_insn_selection.cpp |4 +++- backend/src/backend/gen_insn_selection.hxx |1 + backend/src/ir/instruction.cpp |1 + backend/src/ir/instruction.hpp |2 ++ backend/src/ir/instruction.hxx |1 + backend/src/libocl/script/ocl_integer.def |3 +-- backend/src/libocl/tmpl/ocl_integer.tmpl.cl | 30 +++ backend/src/libocl/tmpl/ocl_integer.tmpl.h |9 backend/src/llvm/llvm_gen_backend.cpp |2 ++ backend/src/llvm/llvm_gen_ocl_function.hxx |1 + 15 files changed, 56 insertions(+), 3 deletions(-) diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c index 266b501..330dffb 100644 --- a/backend/src/backend/gen/gen_mesa_disasm.c +++ b/backend/src/backend/gen/gen_mesa_disasm.c @@ -66,6 +66,7 @@ static const struct { [GEN_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 }, [GEN_OPCODE_FBH] = { .name = "fbh", .nsrc = 1, .ndst = 1 }, [GEN_OPCODE_FBL] = { .name = "fbl", .nsrc = 1, .ndst = 1 }, + [GEN_OPCODE_CBIT] = { .name = "cbit", .nsrc = 1, .ndst = 1 }, [GEN_OPCODE_F16TO32] = { .name = "f16to32", .nsrc = 1, .ndst = 1 }, [GEN_OPCODE_F32TO16] = { .name = "f32to16", .nsrc = 1, .ndst = 1 }, diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 8844233..c37d2ee 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -205,6 +205,7 @@ namespace gbe case SEL_OP_READ_ARF: p->MOV(dst, src); break; case SEL_OP_FBH: p->FBH(dst, src); break; case SEL_OP_FBL: p->FBL(dst, src); break; + case SEL_OP_CBIT: p->CBIT(dst, src); break; case SEL_OP_NOT: p->NOT(dst, src); break; case SEL_OP_RNDD: p->RNDD(dst, src); break; case SEL_OP_RNDU: p->RNDU(dst, src); break; diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp index 19aad95..3faacde 100644 --- a/backend/src/backend/gen_defs.hpp +++ b/backend/src/backend/gen_defs.hpp @@ -159,6 +159,7 @@ enum opcode { GEN_OPCODE_LZD = 74, GEN_OPCODE_FBH = 75, GEN_OPCODE_FBL = 76, + GEN_OPCODE_CBIT = 77, GEN_OPCODE_ADDC = 78, GEN_OPCODE_SUBB = 79, GEN_OPCODE_SAD2 = 80, diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp index 295e11d..bd6204a 100644 --- a/backend/src/backend/gen_encoder.cpp +++ b/backend/src/backend/gen_encoder.cpp @@ -901,6 +901,7 @@ namespace gbe ALU1(RNDU) ALU1(FBH) ALU1(FBL) + ALU1(CBIT) ALU1(F16TO32) ALU1(F32TO16) ALU2(SEL) diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp index 2c999ce..3f486d7 100644 --- a/backend/src/backend/gen_encoder.hpp +++ b/backend/src/backend/gen_encoder.hpp @@ -101,6 +101,7 @@ namespace gbe ALU1(MOV) ALU1(FBH) ALU1(FBL) +ALU1(CBIT) ALU2(SUBB) ALU2(UPSAMPLE_SHORT) ALU2(UPSAMPLE_INT) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index e3ee35d..e05a0a6 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -493,6 +493,7 @@ namespace gbe ALU2WithTemp(MUL_HI) ALU1(FBH) ALU1(FBL) +ALU1(CBIT) ALU2WithTemp(HADD) ALU2WithTemp(RHADD) ALU2(UPSAMPLE_SHORT) @@ -1861,7 +1862,7 @@ namespace gbe static ir::Type getType(const ir::Opcode opcode, const ir::Type insnType) { if (insnType == ir::TYPE_S64 || insnType == ir::TYPE_U64 || insnType == ir::TYPE_S8 || insnType == ir::TYPE_U8) return insnType; - if (opcode == ir::OP_FBH || opcode == ir::OP_FBL) + if (opcode == ir::OP_FBH || opcode == ir::OP_FBL || opcode == ir::OP_CBIT) return ir::TYPE_U32; if (insnType == ir::TYPE_S16 || insnType == ir::TYPE_U16) return insnType; @@ -1915,6 +1916,7 @@ namespace gbe case ir::OP_RNDZ: sel.RNDZ(dst, src); break; case ir::OP_FBH: sel.FBH(dst, src); break; case ir::OP_FBL: sel.FBL(dst, src); break; + case ir::OP_CBIT: sel.CBIT(dst, src); break; case ir::OP_COS: sel.MATH(dst, GEN_MATH_FUNCTION_COS, src); break; case ir::OP_SIN: sel.MATH(dst, GEN_MATH_FUNCTION_SIN, src); break; case ir::OP_LOG: sel.MATH(dst, GEN_MATH_FUNCTION_LOG, src); break; diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx index 7511b84..d80dc58 100644 --- a/backend/src/b
[Beignet] [PATCH v2 2/2] add utest popcount for all types.
From: Luo Xionghu v2: add all types to test. Signed-off-by: Luo Xionghu --- kernels/compiler_popcount.cl | 16 + utests/CMakeLists.txt|1 + utests/compiler_popcount.cpp | 75 ++ 3 files changed, 92 insertions(+) create mode 100644 kernels/compiler_popcount.cl create mode 100644 utests/compiler_popcount.cpp diff --git a/kernels/compiler_popcount.cl b/kernels/compiler_popcount.cl new file mode 100644 index 000..1636118 --- /dev/null +++ b/kernels/compiler_popcount.cl @@ -0,0 +1,16 @@ +#define TEST_TYPE(TYPE) \ +kernel void test_##TYPE(global TYPE *src, global TYPE *dst) { \ + int i = get_global_id(0); \ + dst[i] = popcount(src[i]); \ +} + +TEST_TYPE(char) +TEST_TYPE(uchar) +TEST_TYPE(short) +TEST_TYPE(ushort) +TEST_TYPE(int) +TEST_TYPE(uint) +TEST_TYPE(long) +TEST_TYPE(ulong) + +#undef TEST_TYPE diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index b45ecf9..1b8caca 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -41,6 +41,7 @@ set (utests_sources compiler_ceil.cpp compiler_clz_short.cpp compiler_clz_int.cpp + compiler_popcount.cpp compiler_convert_uchar_sat.cpp compiler_copy_buffer.cpp compiler_copy_image.cpp diff --git a/utests/compiler_popcount.cpp b/utests/compiler_popcount.cpp new file mode 100644 index 000..0658e1b --- /dev/null +++ b/utests/compiler_popcount.cpp @@ -0,0 +1,75 @@ +#include "utest_helper.hpp" + +namespace { + +template +T get_max(); + +#define DEF_TEMPLATE(TYPE, NAME)\ +template <> \ +TYPE get_max()\ +{ \ + static TYPE max = CL_##NAME##_MAX;\ + return max; \ +} \ +\ +template <> \ +u##TYPE get_max() \ +{ \ + static u##TYPE max = CL_U##NAME##_MAX;\ + return max; \ +} + +DEF_TEMPLATE(int8_t, CHAR) +DEF_TEMPLATE(int16_t, SHRT) +DEF_TEMPLATE(int32_t, INT) +DEF_TEMPLATE(int64_t, LONG) + +template +void test(const char *kernel_name) +{ + const int n = sizeof(T) * 8; + + // Setup kernel and buffers + OCL_CREATE_KERNEL_FROM_FILE("compiler_popcount", kernel_name); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(T), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(T), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + globals[0] = n; + locals[0] = n; + + OCL_MAP_BUFFER(0); + ((T*)buf_data[0])[0] = 0; + for (int32_t i = 1; i < (int32_t) n; ++i){ +((T*)buf_data[0])[i] = get_max() >> i; + } + OCL_UNMAP_BUFFER(0); + + OCL_NDRANGE(1); + + OCL_MAP_BUFFER(1); + OCL_ASSERT(((T*)buf_data[1])[0] == 0); + for (int i = 1; i < n; ++i){ +OCL_ASSERT(((T*)buf_data[1])[i] == n-i); + } + OCL_UNMAP_BUFFER(1); +} + +} + +#define compiler_popcount(type, kernel) \ +static void compiler_popcount_ ##type(void)\ +{\ + test(# kernel);\ +}\ +MAKE_UTEST_FROM_FUNCTION(compiler_popcount_ ## type); + +compiler_popcount(int8_t, test_char) +compiler_popcount(uint8_t, test_uchar) +compiler_popcount(int16_t, test_short) +compiler_popcount(uint16_t, test_ushort) +compiler_popcount(int32_t, test_int) +compiler_popcount(uint32_t, test_uint) +compiler_popcount(int64_t, test_long) +compiler_popcount(uint64_t, test_ulong) -- 1.7.9.5 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH v3 2/2] add utest popcount for all types.
From: Luo Xionghu v2: add all types to test. v3: fix signed type count bits error. Signed-off-by: Luo Xionghu --- kernels/compiler_popcount.cl | 16 + utests/CMakeLists.txt|1 + utests/compiler_popcount.cpp | 75 ++ 3 files changed, 92 insertions(+) create mode 100644 kernels/compiler_popcount.cl create mode 100644 utests/compiler_popcount.cpp diff --git a/kernels/compiler_popcount.cl b/kernels/compiler_popcount.cl new file mode 100644 index 000..1636118 --- /dev/null +++ b/kernels/compiler_popcount.cl @@ -0,0 +1,16 @@ +#define TEST_TYPE(TYPE) \ +kernel void test_##TYPE(global TYPE *src, global TYPE *dst) { \ + int i = get_global_id(0); \ + dst[i] = popcount(src[i]); \ +} + +TEST_TYPE(char) +TEST_TYPE(uchar) +TEST_TYPE(short) +TEST_TYPE(ushort) +TEST_TYPE(int) +TEST_TYPE(uint) +TEST_TYPE(long) +TEST_TYPE(ulong) + +#undef TEST_TYPE diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index b45ecf9..1b8caca 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -41,6 +41,7 @@ set (utests_sources compiler_ceil.cpp compiler_clz_short.cpp compiler_clz_int.cpp + compiler_popcount.cpp compiler_convert_uchar_sat.cpp compiler_copy_buffer.cpp compiler_copy_image.cpp diff --git a/utests/compiler_popcount.cpp b/utests/compiler_popcount.cpp new file mode 100644 index 000..c960ae6 --- /dev/null +++ b/utests/compiler_popcount.cpp @@ -0,0 +1,75 @@ +#include "utest_helper.hpp" + +namespace { + +template +T get_max(); + +#define DEF_TEMPLATE(TYPE, NAME)\ +template <> \ +TYPE get_max()\ +{ \ + static TYPE max = CL_##NAME##_MAX;\ + return max; \ +} \ +\ +template <> \ +u##TYPE get_max() \ +{ \ + static u##TYPE max = CL_U##NAME##_MAX;\ + return max; \ +} + +DEF_TEMPLATE(int8_t, CHAR) +DEF_TEMPLATE(int16_t, SHRT) +DEF_TEMPLATE(int32_t, INT) +DEF_TEMPLATE(int64_t, LONG) + +template +void test(const char *kernel_name, int s_type) +{ + const int n = sizeof(T) * 8; + + // Setup kernel and buffers + OCL_CREATE_KERNEL_FROM_FILE("compiler_popcount", kernel_name); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(T), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(T), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + globals[0] = n; + locals[0] = n; + + OCL_MAP_BUFFER(0); + ((T*)buf_data[0])[0] = 0; + for (int32_t i = 1; i < (int32_t) n; ++i){ +((T*)buf_data[0])[i] = get_max() >> i; + } + OCL_UNMAP_BUFFER(0); + + OCL_NDRANGE(1); + + OCL_MAP_BUFFER(1); + OCL_ASSERT(((T*)buf_data[1])[0] == 0); + for (int i = 1; i < n; ++i){ +OCL_ASSERT(((T*)buf_data[1])[i] == n-i-s_type); + } + OCL_UNMAP_BUFFER(1); +} + +} + +#define compiler_popcount(type, kernel, s_type) \ +static void compiler_popcount_ ##type(void)\ +{\ + test(# kernel, s_type);\ +}\ +MAKE_UTEST_FROM_FUNCTION(compiler_popcount_ ## type); + +compiler_popcount(int8_t, test_char, 1) +compiler_popcount(uint8_t, test_uchar, 0) +compiler_popcount(int16_t, test_short, 1) +compiler_popcount(uint16_t, test_ushort, 0) +compiler_popcount(int32_t, test_int, 1) +compiler_popcount(uint32_t, test_uint, 0) +compiler_popcount(int64_t, test_long, 1) +compiler_popcount(uint64_t, test_ulong, 0) -- 1.7.9.5 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH v2] use global flag 0.0 to control unstructured simple block.
From: Luo Xionghu filter the simple block out and replace the if/endif with global flag to control. v2: fix the luxmark sala performance degression due to extern flag in a BRA instruction. Signed-off-by: Luo Xionghu --- backend/src/backend/gen_insn_selection.cpp | 80 ++-- backend/src/backend/gen_insn_selection.hpp |1 + backend/src/backend/gen_reg_allocation.cpp |3 +- 3 files changed, 68 insertions(+), 16 deletions(-) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index b2df76f..f0fd494 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -217,7 +217,7 @@ namespace gbe // SelectionBlock /// - SelectionBlock::SelectionBlock(const ir::BasicBlock *bb) : bb(bb), isLargeBlock(false), endifLabel( (ir::LabelIndex) 0){} + SelectionBlock::SelectionBlock(const ir::BasicBlock *bb) : bb(bb), isLargeBlock(false), endifLabel( (ir::LabelIndex) 0), removeSimpleIfEndif(false){} void SelectionBlock::append(ir::Register reg) { tmp.push_back(reg); } @@ -405,6 +405,8 @@ namespace gbe uint32_t buildBasicBlockDAG(const ir::BasicBlock &bb); /*! Perform the selection on the basic block */ void matchBasicBlock(const ir::BasicBlock &bb, uint32_t insnNum); +/*! a simple block can use predication instead of if/endif*/ +bool isSimpleBlock(const ir::BasicBlock &bb, uint32_t insnNum); /*! A root instruction needs to be generated */ bool isRoot(const ir::Instruction &insn) const; @@ -1483,6 +1485,37 @@ namespace gbe return false; } + bool Selection::Opaque::isSimpleBlock(const ir::BasicBlock &bb, uint32_t insnNum) { + +if(bb.belongToStructure) + return false; + +for (int32_t insnID = insnNum-1; insnID >= 0; --insnID) { + SelectionDAG &dag = *insnDAG[insnID]; + const ir::Instruction& insn = dag.insn; + if(insn.isMemberOf() || + insn.isMemberOf() || + insn.getOpcode() == ir::OP_SIMD_ANY || + insn.getOpcode() == ir::OP_SIMD_ALL || + insn.getOpcode() == ir::OP_ELSE) +return false; +} + +// there would generate a extra CMP instruction for predicated BRA with extern flag, +// should retrun false to keep the if/endif. +if((insnDAG[insnNum-1]->insn.isMemberOf())){ + if (insnDAG[insnNum-1]->insn.getOpcode() == ir::OP_BRA) { +const ir::BranchInstruction &insn = ir::cast(insnDAG[insnNum-1]->insn); +if(insn.isPredicated() && insnDAG[insnNum-1]->child[0] == NULL){ + return false; +} + } +} + +return true; + } + + uint32_t Selection::Opaque::buildBasicBlockDAG(const ir::BasicBlock &bb) { using namespace ir; @@ -1563,7 +1596,8 @@ namespace gbe // Bottom up code generation bool needEndif = this->block->hasBranch == false && !this->block->hasBarrier; needEndif = needEndif && bb.needEndif; -if (needEndif) { +this->block->removeSimpleIfEndif = insnNum < 10 && isSimpleBlock(bb, insnNum); +if (needEndif && !this->block->removeSimpleIfEndif) { if(!bb.needIf) // this basic block is the exit of a structure this->ENDIF(GenRegister::immd(0), bb.endifLabel, bb.endifLabel); else { @@ -1584,6 +1618,13 @@ namespace gbe // Start a new code fragment this->startBackwardGeneration(); + +if(this->block->removeSimpleIfEndif){ + this->push(); +this->curr.predicate = GEN_PREDICATE_NORMAL; +this->curr.flag = 0; +this->curr.subFlag = 0; +} // If there is no branch at the end of this block. // Try all the patterns from best to worst @@ -1593,6 +1634,13 @@ namespace gbe ++it; } while (it != end); GBE_ASSERT(it != end); + +if(this->block->removeSimpleIfEndif){ +this->curr.predicate = GEN_PREDICATE_NONE; +this->curr.flag = 0; +this->curr.subFlag = 0; + this->pop(); +} // If we are in if/endif fix mode, and this block is // large enough, we need to insert endif/if pair to eliminate // the too long if/endif block. @@ -3836,15 +3884,17 @@ namespace gbe sel.JMPI(GenRegister::immd(0), jip, label); sel.pop(); } -sel.push(); - sel.curr.predicate = GEN_PREDICATE_NORMAL; - if(!insn.getParent()->needEndif && insn.getParent()->needIf) { -ir::LabelIndex label = insn.getParent()->endifLabel; -sel.IF(GenRegister::immd(0), label, label); - } - else -sel.IF(GenRegister::immd(0), sel.block->endifLabel, sel.block->endifLabel); -sel.pop(); +if(!sel.block->removeSimpleIfEndif){ + sel.push(); +sel.curr.predicate = GEN_PREDICATE_NORMAL; +if(!insn