[Beignet] [PATCH] refine the event implementation and fix some bugs.

2014-04-24 Thread xionghu . luo
From: Luo 

1 remove useless data element;
2 fix some logic bugs;
3 add implementation for clEnqueueMarkerWithWaitList.
---
 src/cl_alloc.c |  1 +
 src/cl_api.c   | 22 +---
 src/cl_event.c | 65 --
 src/cl_event.h |  4 +++-
 4 files changed, 64 insertions(+), 28 deletions(-)

diff --git a/src/cl_alloc.c b/src/cl_alloc.c
index 20d5578..93d2e6a 100644
--- a/src/cl_alloc.c
+++ b/src/cl_alloc.c
@@ -71,6 +71,7 @@ cl_free(void *ptr)
 return;
   atomic_dec(&cl_alloc_n);
   free(ptr);
+  ptr = NULL;
 }
 
 LOCAL size_t
diff --git a/src/cl_api.c b/src/cl_api.c
index 1543ff4..a8c4fbe 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -2621,10 +2621,26 @@ clEnqueueNativeKernel(cl_command_queue   command_queue,
 error:
   return err;
 }
+clEnqueueMarker(cl_command_queue command_queue,
+cl_event *event)
+{
+  cl_int err = CL_SUCCESS;
+  CHECK_QUEUE(command_queue);
+  if(event == NULL) {
+err = CL_INVALID_VALUE;
+goto error;
+  }
+
+  cl_event_marker_with_wait_list(command_queue, 0, NULL, event);
+error:
+  return err;
+}
 
 cl_int
-clEnqueueMarker(cl_command_queue command_queue,
-cl_event *   event)
+clEnqueueMarkerWithWaitList(cl_command_queue command_queue,
+cl_uint num_events_in_wait_list,
+const cl_event *event_wait_list,
+cl_event *event)
 {
   cl_int err = CL_SUCCESS;
   CHECK_QUEUE(command_queue);
@@ -2633,7 +2649,7 @@ clEnqueueMarker(cl_command_queue command_queue,
 goto error;
   }
 
-  cl_event_marker(command_queue, event);
+  cl_event_marker_with_wait_list(command_queue, num_events_in_wait_list, 
event_wait_list, event);
 error:
   return err;
 }
diff --git a/src/cl_event.c b/src/cl_event.c
index 727ee1f..203bfc2 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -224,7 +224,7 @@ cl_int cl_event_wait_events(cl_uint 
num_events_in_wait_list, const cl_event *eve
 if((event_wait_list[i]->type == CL_COMMAND_USER) ||
(event_wait_list[i]->enqueue_cb &&
(event_wait_list[i]->enqueue_cb->wait_user_events != NULL))){
-  for(j=0; jgpgpu_event)
   cl_gpgpu_event_update_status(event_wait_list[i]->gpgpu_event, 1);
-cl_event_set_status(event_wait_list[i], CL_COMPLETE);  //Execute user's 
callback
+    cl_event_set_status(event_wait_list[i], CL_COMPLETE);  //Execute 
user's callback
   }
   return CL_ENQUEUE_EXECUTE_IMM;
 }
@@ -260,12 +260,14 @@ void cl_event_new_enqueue_callback(cl_event event,
   cl_int i;
   GET_QUEUE_THREAD_GPGPU(data->queue);
 
-  /* Allocate and inialize the structure itself */
+  /* Allocate and initialize the structure itself */
   TRY_ALLOC_NO_ERR (cb, CALLOC(enqueue_callback));
+#if 0
   cb->num_events = num_events_in_wait_list;
   TRY_ALLOC_NO_ERR (cb->wait_list, CALLOC_ARRAY(cl_event, 
num_events_in_wait_list));
   for(i=0; iwait_list[i] = event_wait_list[i];
+#endif
   cb->event = event;
   cb->next = NULL;
   cb->wait_user_events = NULL;
@@ -276,12 +278,13 @@ void cl_event_new_enqueue_callback(cl_event event,
   node = queue->wait_events[i]->waits_head;
   if(node == NULL)
 queue->wait_events[i]->waits_head = cb;
-  else
-while((node != cb) && node->next)
-  node = node->next;
-if(node == cb)   //wait on dup user event
-  continue;
-node->next = cb;
+  else{
+  while((node != cb) && node->next)
+  node = node->next;
+  if(node == cb)   //wait on dup user event
+  continue;
+  node->next = cb;
+  }
 
   /* Insert the user event to enqueue_callback's wait_user_events */
   TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event));
@@ -291,7 +294,7 @@ void cl_event_new_enqueue_callback(cl_event event,
 }
   }
 
-  /* Find out all user events that events in event_wait_list wait */
+  /* Find out all user events that in event_wait_list wait */
   for(i=0; istatus <= CL_COMPLETE)
   continue;
@@ -319,21 +322,25 @@ void cl_event_new_enqueue_callback(cl_event event,
   while(user_events != NULL) {
 /* Insert the enqueue_callback to user event's  waits_tail */
 node = user_events->event->waits_head;
-while((node != cb) && node->next)
-  node = node->next;
-if(node == cb) {  //wait on dup user event
-  user_events = user_events->next;
-  continue;
+if(node == NULL)
+event_wait_list[i]->waits_head = cb;
+else{
+while((node != cb) && node->next)
+node = node->next;
+if(node == cb) {  //wait on dup user event
+user_events = user_events->next;
+continue;
+}
+node->next = cb;
 }
-node->next = cb;
 
 /* Insert the user event to enqueue_callback's wait_user_events */
 TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event));
 u_ev->event = user_events->event;
 u_ev->next = cb->wait_user_events;

[Beignet] [PATCH] add OpenCL 1.2 API clEnqueueMarkerWithWaitList.

2014-04-24 Thread xionghu . luo
From: Luo 

---
 src/cl_api.c   | 23 ---
 src/cl_event.c | 10 +-
 src/cl_event.h |  2 +-
 3 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/src/cl_api.c b/src/cl_api.c
index 1543ff4..0f62cb5 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -2621,10 +2621,26 @@ clEnqueueNativeKernel(cl_command_queue   command_queue,
 error:
   return err;
 }
+clEnqueueMarker(cl_command_queue command_queue,
+cl_event *event)
+{
+  cl_int err = CL_SUCCESS;
+  CHECK_QUEUE(command_queue);
+  if(event == NULL) {
+err = CL_INVALID_VALUE;
+goto error;
+  }
+
+  cl_event_marker_with_wait_list(command_queue, 0, NULL, event);
+error:
+  return err;
+}
 
 cl_int
-clEnqueueMarker(cl_command_queue command_queue,
-cl_event *   event)
+clEnqueueMarkerWithWaitList(cl_command_queue command_queue,
+cl_uint num_events_in_wait_list,
+const cl_event *event_wait_list,
+cl_event *event)
 {
   cl_int err = CL_SUCCESS;
   CHECK_QUEUE(command_queue);
@@ -2632,8 +2648,9 @@ clEnqueueMarker(cl_command_queue command_queue,
 err = CL_INVALID_VALUE;
 goto error;
   }
+  TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, 
event, command_queue->ctx);
 
-  cl_event_marker(command_queue, event);
+  cl_event_marker_with_wait_list(command_queue, num_events_in_wait_list, 
event_wait_list, event);
 error:
   return err;
 }
diff --git a/src/cl_event.c b/src/cl_event.c
index 727ee1f..9d4e491 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -474,7 +474,10 @@ void cl_event_update_status(cl_event event)
 cl_event_set_status(event, CL_COMPLETE);
 }
 
-cl_int cl_event_marker(cl_command_queue queue, cl_event* event)
+cl_int cl_event_marker_with_wait_list(cl_command_queue queue,
+cl_uint num_events_in_wait_list,
+const cl_event *event_wait_list,
+cl_event* event)
 {
   enqueue_data data;
 
@@ -482,6 +485,11 @@ cl_int cl_event_marker(cl_command_queue queue, cl_event* 
event)
   if(event == NULL)
 return CL_OUT_OF_HOST_MEMORY;
 
+  //insert the input events to queue
+  for(i=0; i0, the marker event need wait queue->wait_events
   if(queue->wait_events_num > 0) {
 data.type = EnqueueMarker;
diff --git a/src/cl_event.h b/src/cl_event.h
index 3c61110..5a78a8d 100644
--- a/src/cl_event.h
+++ b/src/cl_event.h
@@ -90,7 +90,7 @@ void cl_event_set_status(cl_event, cl_int);
 /* Check and update event status */
 void cl_event_update_status(cl_event);
 /* Create the marker event */
-cl_int cl_event_marker(cl_command_queue, cl_event*);
+cl_int cl_event_marker_with_wait_list(cl_command_queue, cl_uint, const 
cl_event *,  cl_event*);
 /* Do the event profiling */
 cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name);
 #endif /* __CL_EVENT_H__ */
-- 
1.8.1.2

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 2/2] add [opencl 1.2] API clEnqueueMarkerWithWaitList.

2014-04-28 Thread xionghu . luo
From: Luo 

---
 src/cl_api.c   | 23 ---
 src/cl_event.c | 11 ++-
 src/cl_event.h |  2 +-
 3 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/src/cl_api.c b/src/cl_api.c
index 1543ff4..0f62cb5 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -2621,10 +2621,26 @@ clEnqueueNativeKernel(cl_command_queue   command_queue,
 error:
   return err;
 }
+clEnqueueMarker(cl_command_queue command_queue,
+cl_event *event)
+{
+  cl_int err = CL_SUCCESS;
+  CHECK_QUEUE(command_queue);
+  if(event == NULL) {
+err = CL_INVALID_VALUE;
+goto error;
+  }
+
+  cl_event_marker_with_wait_list(command_queue, 0, NULL, event);
+error:
+  return err;
+}
 
 cl_int
-clEnqueueMarker(cl_command_queue command_queue,
-cl_event *   event)
+clEnqueueMarkerWithWaitList(cl_command_queue command_queue,
+cl_uint num_events_in_wait_list,
+const cl_event *event_wait_list,
+cl_event *event)
 {
   cl_int err = CL_SUCCESS;
   CHECK_QUEUE(command_queue);
@@ -2632,8 +2648,9 @@ clEnqueueMarker(cl_command_queue command_queue,
 err = CL_INVALID_VALUE;
 goto error;
   }
+  TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, 
event, command_queue->ctx);
 
-  cl_event_marker(command_queue, event);
+  cl_event_marker_with_wait_list(command_queue, num_events_in_wait_list, 
event_wait_list, event);
 error:
   return err;
 }
diff --git a/src/cl_event.c b/src/cl_event.c
index 50ca134..1ad12a7 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -480,14 +480,23 @@ void cl_event_update_status(cl_event event)
 cl_event_set_status(event, CL_COMPLETE);
 }
 
-cl_int cl_event_marker(cl_command_queue queue, cl_event* event)
+cl_int cl_event_marker_with_wait_list(cl_command_queue queue,
+cl_uint num_events_in_wait_list,
+const cl_event *event_wait_list,
+cl_event* event)
 {
   enqueue_data data;
+  cl_uint i = 0;
 
   *event = cl_event_new(queue->ctx, queue, CL_COMMAND_MARKER, CL_TRUE);
   if(event == NULL)
 return CL_OUT_OF_HOST_MEMORY;
 
+  //insert the input events to queue
+  for(i=0; i0, the marker event need wait queue->wait_events
   if(queue->wait_events_num > 0) {
 data.type = EnqueueMarker;
diff --git a/src/cl_event.h b/src/cl_event.h
index 3c61110..5a78a8d 100644
--- a/src/cl_event.h
+++ b/src/cl_event.h
@@ -90,7 +90,7 @@ void cl_event_set_status(cl_event, cl_int);
 /* Check and update event status */
 void cl_event_update_status(cl_event);
 /* Create the marker event */
-cl_int cl_event_marker(cl_command_queue, cl_event*);
+cl_int cl_event_marker_with_wait_list(cl_command_queue, cl_uint, const 
cl_event *,  cl_event*);
 /* Do the event profiling */
 cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name);
 #endif /* __CL_EVENT_H__ */
-- 
1.8.1.2

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 1/2] fix event related bugs.

2014-04-28 Thread xionghu . luo
From: Luo 

---
 src/cl_alloc.c |  1 +
 src/cl_event.c | 76 +++---
 2 files changed, 42 insertions(+), 35 deletions(-)

diff --git a/src/cl_alloc.c b/src/cl_alloc.c
index 20d5578..93d2e6a 100644
--- a/src/cl_alloc.c
+++ b/src/cl_alloc.c
@@ -71,6 +71,7 @@ cl_free(void *ptr)
 return;
   atomic_dec(&cl_alloc_n);
   free(ptr);
+  ptr = NULL;
 }
 
 LOCAL size_t
diff --git a/src/cl_event.c b/src/cl_event.c
index 727ee1f..50ca134 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -260,12 +260,12 @@ void cl_event_new_enqueue_callback(cl_event event,
   cl_int i;
   GET_QUEUE_THREAD_GPGPU(data->queue);
 
-  /* Allocate and inialize the structure itself */
+  /* Allocate and initialize the structure itself */
   TRY_ALLOC_NO_ERR (cb, CALLOC(enqueue_callback));
   cb->num_events = num_events_in_wait_list;
   TRY_ALLOC_NO_ERR (cb->wait_list, CALLOC_ARRAY(cl_event, 
num_events_in_wait_list));
   for(i=0; iwait_list[i] = event_wait_list[i];
+  cb->wait_list[i] = event_wait_list[i];
   cb->event = event;
   cb->next = NULL;
   cb->wait_user_events = NULL;
@@ -276,12 +276,13 @@ void cl_event_new_enqueue_callback(cl_event event,
   node = queue->wait_events[i]->waits_head;
   if(node == NULL)
 queue->wait_events[i]->waits_head = cb;
-  else
-while((node != cb) && node->next)
-  node = node->next;
-if(node == cb)   //wait on dup user event
-  continue;
-node->next = cb;
+  else{
+  while((node != cb) && node->next)
+  node = node->next;
+  if(node == cb)   //wait on dup user event
+  continue;
+  node->next = cb;
+  }
 
   /* Insert the user event to enqueue_callback's wait_user_events */
   TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event));
@@ -291,7 +292,7 @@ void cl_event_new_enqueue_callback(cl_event event,
 }
   }
 
-  /* Find out all user events that events in event_wait_list wait */
+  /* Find out all user events that in event_wait_list wait */
   for(i=0; istatus <= CL_COMPLETE)
   continue;
@@ -319,21 +320,25 @@ void cl_event_new_enqueue_callback(cl_event event,
   while(user_events != NULL) {
 /* Insert the enqueue_callback to user event's  waits_tail */
 node = user_events->event->waits_head;
-while((node != cb) && node->next)
-  node = node->next;
-if(node == cb) {  //wait on dup user event
-  user_events = user_events->next;
-  continue;
+if(node == NULL)
+event_wait_list[i]->waits_head = cb;
+else{
+while((node != cb) && node->next)
+node = node->next;
+if(node == cb) {  //wait on dup user event
+user_events = user_events->next;
+continue;
+}
+node->next = cb;
 }
-node->next = cb;
 
 /* Insert the user event to enqueue_callback's wait_user_events */
 TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event));
 u_ev->event = user_events->event;
 u_ev->next = cb->wait_user_events;
 cb->wait_user_events = u_ev;
+cl_command_queue_insert_event(event->queue, user_events->event);
 user_events = user_events->next;
-cl_command_queue_insert_event(event->queue, event_wait_list[i]);
   }
 }
   }
@@ -353,8 +358,6 @@ error:
   cb->wait_user_events = cb->wait_user_events->next;
   cl_free(u_ev);
 }
-if(cb->wait_list)
-  cl_free(cb->wait_list);
 cl_free(cb);
   }
   goto exit;
@@ -363,7 +366,7 @@ error:
 void cl_event_set_status(cl_event event, cl_int status)
 {
   user_callback *user_cb;
-  user_event*u_ev, *u_ev_next;
+  user_event*u_ev;
   cl_int ret, i;
   cl_event evt;
 
@@ -387,11 +390,11 @@ void cl_event_set_status(cl_event event, cl_int status)
 
   pthread_mutex_unlock(&event->ctx->event_lock);
   for(i=0; ienqueue_cb->num_events; i++)
-cl_event_delete(event->enqueue_cb->wait_list[i]);
+  cl_event_delete(event->enqueue_cb->wait_list[i]);
   pthread_mutex_lock(&event->ctx->event_lock);
 
   if(event->enqueue_cb->wait_list)
-cl_free(event->enqueue_cb->wait_list);
+  cl_free(event->enqueue_cb->wait_list);
   cl_free(event->enqueue_cb);
   event->enqueue_cb = NULL;
 }
@@ -419,22 +422,25 @@ void cl_event_set_status(cl_event event, cl_int status)
   /* Check all defer enqueue */
   enqueue_callback *cb, *enqueue_cb = event->waits_head;
   while(enqueue_cb) {
-/* Remove this user event in enqueue_cb */
-while(enqueue_cb->wait_user_events &&
-  enqueue_cb->wait_user_events->event == event) {
-  u_ev = enqueue_cb->wait_user_events;
-  enqueue_cb->wait_user_events = enqueue_cb->wait_user_events->next;
-  cl_free(u_ev);
-}
-
+/* Remove this user event in enqueue_cb, update the header if needed. */
 u_ev = enqueue_cb->wait_user_events;
+user_event * u_prev = N

[Beignet] [PATCH 1/2] [opencl 1.2]fix event related bugs.

2014-04-28 Thread xionghu . luo
From: Luo 

---
 src/cl_alloc.c |  1 +
 src/cl_event.c | 76 --
 2 files changed, 43 insertions(+), 34 deletions(-)

diff --git a/src/cl_alloc.c b/src/cl_alloc.c
index 20d5578..93d2e6a 100644
--- a/src/cl_alloc.c
+++ b/src/cl_alloc.c
@@ -71,6 +71,7 @@ cl_free(void *ptr)
 return;
   atomic_dec(&cl_alloc_n);
   free(ptr);
+  ptr = NULL;
 }
 
 LOCAL size_t
diff --git a/src/cl_event.c b/src/cl_event.c
index 727ee1f..620c116 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -260,12 +260,12 @@ void cl_event_new_enqueue_callback(cl_event event,
   cl_int i;
   GET_QUEUE_THREAD_GPGPU(data->queue);
 
-  /* Allocate and inialize the structure itself */
+  /* Allocate and initialize the structure itself */
   TRY_ALLOC_NO_ERR (cb, CALLOC(enqueue_callback));
   cb->num_events = num_events_in_wait_list;
   TRY_ALLOC_NO_ERR (cb->wait_list, CALLOC_ARRAY(cl_event, 
num_events_in_wait_list));
   for(i=0; iwait_list[i] = event_wait_list[i];
+  cb->wait_list[i] = event_wait_list[i];
   cb->event = event;
   cb->next = NULL;
   cb->wait_user_events = NULL;
@@ -276,12 +276,13 @@ void cl_event_new_enqueue_callback(cl_event event,
   node = queue->wait_events[i]->waits_head;
   if(node == NULL)
 queue->wait_events[i]->waits_head = cb;
-  else
-while((node != cb) && node->next)
-  node = node->next;
-if(node == cb)   //wait on dup user event
-  continue;
-node->next = cb;
+  else{
+  while((node != cb) && node->next)
+  node = node->next;
+  if(node == cb)   //wait on dup user event
+  continue;
+  node->next = cb;
+  }
 
   /* Insert the user event to enqueue_callback's wait_user_events */
   TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event));
@@ -291,7 +292,7 @@ void cl_event_new_enqueue_callback(cl_event event,
 }
   }
 
-  /* Find out all user events that events in event_wait_list wait */
+  /* Find out all user events that in event_wait_list wait */
   for(i=0; istatus <= CL_COMPLETE)
   continue;
@@ -319,21 +320,25 @@ void cl_event_new_enqueue_callback(cl_event event,
   while(user_events != NULL) {
 /* Insert the enqueue_callback to user event's  waits_tail */
 node = user_events->event->waits_head;
-while((node != cb) && node->next)
-  node = node->next;
-if(node == cb) {  //wait on dup user event
-  user_events = user_events->next;
-  continue;
+if(node == NULL)
+event_wait_list[i]->waits_head = cb;
+else{
+while((node != cb) && node->next)
+node = node->next;
+if(node == cb) {  //wait on dup user event
+user_events = user_events->next;
+continue;
+}
+node->next = cb;
 }
-node->next = cb;
 
 /* Insert the user event to enqueue_callback's wait_user_events */
 TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event));
 u_ev->event = user_events->event;
 u_ev->next = cb->wait_user_events;
 cb->wait_user_events = u_ev;
+cl_command_queue_insert_event(event->queue, user_events->event);
 user_events = user_events->next;
-cl_command_queue_insert_event(event->queue, event_wait_list[i]);
   }
 }
   }
@@ -354,7 +359,7 @@ error:
   cl_free(u_ev);
 }
 if(cb->wait_list)
-  cl_free(cb->wait_list);
+cl_free(cb->wait_list);
 cl_free(cb);
   }
   goto exit;
@@ -363,7 +368,7 @@ error:
 void cl_event_set_status(cl_event event, cl_int status)
 {
   user_callback *user_cb;
-  user_event*u_ev, *u_ev_next;
+  user_event*u_ev;
   cl_int ret, i;
   cl_event evt;
 
@@ -387,11 +392,11 @@ void cl_event_set_status(cl_event event, cl_int status)
 
   pthread_mutex_unlock(&event->ctx->event_lock);
   for(i=0; ienqueue_cb->num_events; i++)
-cl_event_delete(event->enqueue_cb->wait_list[i]);
+  cl_event_delete(event->enqueue_cb->wait_list[i]);
   pthread_mutex_lock(&event->ctx->event_lock);
 
   if(event->enqueue_cb->wait_list)
-cl_free(event->enqueue_cb->wait_list);
+  cl_free(event->enqueue_cb->wait_list);
   cl_free(event->enqueue_cb);
   event->enqueue_cb = NULL;
 }
@@ -419,22 +424,25 @@ void cl_event_set_status(cl_event event, cl_int status)
   /* Check all defer enqueue */
   enqueue_callback *cb, *enqueue_cb = event->waits_head;
   while(enqueue_cb) {
-/* Remove this user event in enqueue_cb */
-while(enqueue_cb->wait_user_events &&
-  enqueue_cb->wait_user_events->event == event) {
-  u_ev = enqueue_cb->wait_user_events;
-  enqueue_cb->wait_user_events = enqueue_cb->wait_user_events->next;
-  cl_free(u_ev);
-}
-
+/* Remove this user event in enqueue_cb, update the header if needed. */
 u_ev = enqueue_cb->wait_user_events;
+user_event * u_prev = NULL;
+user_event *tmp

[Beignet] [PATCH 2/2] add [opencl 1.2] API clEnqueueMarkerWithWaitList.

2014-04-28 Thread xionghu . luo
From: Luo 

---
 src/cl_api.c   | 25 ++---
 src/cl_event.c | 11 ++-
 src/cl_event.h |  2 +-
 3 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/src/cl_api.c b/src/cl_api.c
index 1543ff4..b5c42e7 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -2623,8 +2623,8 @@ error:
 }
 
 cl_int
-clEnqueueMarker(cl_command_queue command_queue,
-cl_event *   event)
+clEnqueueMarker(cl_command_queue command_queue,
+cl_event *event)
 {
   cl_int err = CL_SUCCESS;
   CHECK_QUEUE(command_queue);
@@ -2633,7 +2633,26 @@ clEnqueueMarker(cl_command_queue command_queue,
 goto error;
   }
 
-  cl_event_marker(command_queue, event);
+  cl_event_marker_with_wait_list(command_queue, 0, NULL, event);
+error:
+  return err;
+}
+
+cl_int
+clEnqueueMarkerWithWaitList(cl_command_queue command_queue,
+cl_uint num_events_in_wait_list,
+const cl_event *event_wait_list,
+cl_event *event)
+{
+  cl_int err = CL_SUCCESS;
+  CHECK_QUEUE(command_queue);
+  if(event == NULL) {
+err = CL_INVALID_VALUE;
+goto error;
+  }
+  TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, 
event, command_queue->ctx);
+
+  cl_event_marker_with_wait_list(command_queue, num_events_in_wait_list, 
event_wait_list, event);
 error:
   return err;
 }
diff --git a/src/cl_event.c b/src/cl_event.c
index 620c116..07260ac 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -482,14 +482,23 @@ void cl_event_update_status(cl_event event)
 cl_event_set_status(event, CL_COMPLETE);
 }
 
-cl_int cl_event_marker(cl_command_queue queue, cl_event* event)
+cl_int cl_event_marker_with_wait_list(cl_command_queue queue,
+cl_uint num_events_in_wait_list,
+const cl_event *event_wait_list,
+cl_event* event)
 {
   enqueue_data data;
+  cl_uint i = 0;
 
   *event = cl_event_new(queue->ctx, queue, CL_COMMAND_MARKER, CL_TRUE);
   if(event == NULL)
 return CL_OUT_OF_HOST_MEMORY;
 
+  //insert the input events to queue
+  for(i=0; i0, the marker event need wait queue->wait_events
   if(queue->wait_events_num > 0) {
 data.type = EnqueueMarker;
diff --git a/src/cl_event.h b/src/cl_event.h
index 3c61110..5a78a8d 100644
--- a/src/cl_event.h
+++ b/src/cl_event.h
@@ -90,7 +90,7 @@ void cl_event_set_status(cl_event, cl_int);
 /* Check and update event status */
 void cl_event_update_status(cl_event);
 /* Create the marker event */
-cl_int cl_event_marker(cl_command_queue, cl_event*);
+cl_int cl_event_marker_with_wait_list(cl_command_queue, cl_uint, const 
cl_event *,  cl_event*);
 /* Do the event profiling */
 cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name);
 #endif /* __CL_EVENT_H__ */
-- 
1.8.1.2

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 2/2] add [opencl 1.2] API clEnqueueMarkerWithWaitList.

2014-04-28 Thread xionghu . luo
From: Luo 

---
 src/cl_api.c   | 25 ++---
 src/cl_event.c | 11 ++-
 src/cl_event.h |  2 +-
 3 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/src/cl_api.c b/src/cl_api.c
index 1543ff4..b5c42e7 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -2623,8 +2623,8 @@ error:
 }
 
 cl_int
-clEnqueueMarker(cl_command_queue command_queue,
-cl_event *   event)
+clEnqueueMarker(cl_command_queue command_queue,
+cl_event *event)
 {
   cl_int err = CL_SUCCESS;
   CHECK_QUEUE(command_queue);
@@ -2633,7 +2633,26 @@ clEnqueueMarker(cl_command_queue command_queue,
 goto error;
   }
 
-  cl_event_marker(command_queue, event);
+  cl_event_marker_with_wait_list(command_queue, 0, NULL, event);
+error:
+  return err;
+}
+
+cl_int
+clEnqueueMarkerWithWaitList(cl_command_queue command_queue,
+cl_uint num_events_in_wait_list,
+const cl_event *event_wait_list,
+cl_event *event)
+{
+  cl_int err = CL_SUCCESS;
+  CHECK_QUEUE(command_queue);
+  if(event == NULL) {
+err = CL_INVALID_VALUE;
+goto error;
+  }
+  TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, 
event, command_queue->ctx);
+
+  cl_event_marker_with_wait_list(command_queue, num_events_in_wait_list, 
event_wait_list, event);
 error:
   return err;
 }
diff --git a/src/cl_event.c b/src/cl_event.c
index 85e4041..46006ce 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -486,14 +486,23 @@ void cl_event_update_status(cl_event event)
 cl_event_set_status(event, CL_COMPLETE);
 }
 
-cl_int cl_event_marker(cl_command_queue queue, cl_event* event)
+cl_int cl_event_marker_with_wait_list(cl_command_queue queue,
+cl_uint num_events_in_wait_list,
+const cl_event *event_wait_list,
+cl_event* event)
 {
   enqueue_data data;
+  cl_uint i = 0;
 
   *event = cl_event_new(queue->ctx, queue, CL_COMMAND_MARKER, CL_TRUE);
   if(event == NULL)
 return CL_OUT_OF_HOST_MEMORY;
 
+  //insert the input events to queue
+  for(i=0; i0, the marker event need wait queue->wait_events
   if(queue->wait_events_num > 0) {
 data.type = EnqueueMarker;
diff --git a/src/cl_event.h b/src/cl_event.h
index 3c61110..5a78a8d 100644
--- a/src/cl_event.h
+++ b/src/cl_event.h
@@ -90,7 +90,7 @@ void cl_event_set_status(cl_event, cl_int);
 /* Check and update event status */
 void cl_event_update_status(cl_event);
 /* Create the marker event */
-cl_int cl_event_marker(cl_command_queue, cl_event*);
+cl_int cl_event_marker_with_wait_list(cl_command_queue, cl_uint, const 
cl_event *,  cl_event*);
 /* Do the event profiling */
 cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name);
 #endif /* __CL_EVENT_H__ */
-- 
1.8.1.2

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 1/2] [opencl 1.2]fix event related bugs.

2014-04-28 Thread xionghu . luo
From: Luo 

---
 src/cl_alloc.c |  1 +
 src/cl_event.c | 80 +-
 2 files changed, 47 insertions(+), 34 deletions(-)

diff --git a/src/cl_alloc.c b/src/cl_alloc.c
index 20d5578..93d2e6a 100644
--- a/src/cl_alloc.c
+++ b/src/cl_alloc.c
@@ -71,6 +71,7 @@ cl_free(void *ptr)
 return;
   atomic_dec(&cl_alloc_n);
   free(ptr);
+  ptr = NULL;
 }
 
 LOCAL size_t
diff --git a/src/cl_event.c b/src/cl_event.c
index 727ee1f..85e4041 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -260,12 +260,12 @@ void cl_event_new_enqueue_callback(cl_event event,
   cl_int i;
   GET_QUEUE_THREAD_GPGPU(data->queue);
 
-  /* Allocate and inialize the structure itself */
+  /* Allocate and initialize the structure itself */
   TRY_ALLOC_NO_ERR (cb, CALLOC(enqueue_callback));
   cb->num_events = num_events_in_wait_list;
   TRY_ALLOC_NO_ERR (cb->wait_list, CALLOC_ARRAY(cl_event, 
num_events_in_wait_list));
   for(i=0; iwait_list[i] = event_wait_list[i];
+  cb->wait_list[i] = event_wait_list[i];
   cb->event = event;
   cb->next = NULL;
   cb->wait_user_events = NULL;
@@ -276,12 +276,13 @@ void cl_event_new_enqueue_callback(cl_event event,
   node = queue->wait_events[i]->waits_head;
   if(node == NULL)
 queue->wait_events[i]->waits_head = cb;
-  else
-while((node != cb) && node->next)
-  node = node->next;
-if(node == cb)   //wait on dup user event
-  continue;
-node->next = cb;
+  else{
+  while((node != cb) && node->next)
+  node = node->next;
+  if(node == cb)   //wait on dup user event
+  continue;
+  node->next = cb;
+  }
 
   /* Insert the user event to enqueue_callback's wait_user_events */
   TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event));
@@ -291,7 +292,7 @@ void cl_event_new_enqueue_callback(cl_event event,
 }
   }
 
-  /* Find out all user events that events in event_wait_list wait */
+  /* Find out all user events that in event_wait_list wait */
   for(i=0; istatus <= CL_COMPLETE)
   continue;
@@ -319,21 +320,25 @@ void cl_event_new_enqueue_callback(cl_event event,
   while(user_events != NULL) {
 /* Insert the enqueue_callback to user event's  waits_tail */
 node = user_events->event->waits_head;
-while((node != cb) && node->next)
-  node = node->next;
-if(node == cb) {  //wait on dup user event
-  user_events = user_events->next;
-  continue;
+if(node == NULL)
+event_wait_list[i]->waits_head = cb;
+else{
+while((node != cb) && node->next)
+node = node->next;
+if(node == cb) {  //wait on dup user event
+user_events = user_events->next;
+continue;
+}
+node->next = cb;
 }
-node->next = cb;
 
 /* Insert the user event to enqueue_callback's wait_user_events */
 TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event));
 u_ev->event = user_events->event;
 u_ev->next = cb->wait_user_events;
 cb->wait_user_events = u_ev;
+cl_command_queue_insert_event(event->queue, user_events->event);
 user_events = user_events->next;
-cl_command_queue_insert_event(event->queue, event_wait_list[i]);
   }
 }
   }
@@ -354,7 +359,7 @@ error:
   cl_free(u_ev);
 }
 if(cb->wait_list)
-  cl_free(cb->wait_list);
+cl_free(cb->wait_list);
 cl_free(cb);
   }
   goto exit;
@@ -363,7 +368,7 @@ error:
 void cl_event_set_status(cl_event event, cl_int status)
 {
   user_callback *user_cb;
-  user_event*u_ev, *u_ev_next;
+  user_event*u_ev;
   cl_int ret, i;
   cl_event evt;
 
@@ -387,11 +392,11 @@ void cl_event_set_status(cl_event event, cl_int status)
 
   pthread_mutex_unlock(&event->ctx->event_lock);
   for(i=0; ienqueue_cb->num_events; i++)
-cl_event_delete(event->enqueue_cb->wait_list[i]);
+  cl_event_delete(event->enqueue_cb->wait_list[i]);
   pthread_mutex_lock(&event->ctx->event_lock);
 
   if(event->enqueue_cb->wait_list)
-cl_free(event->enqueue_cb->wait_list);
+  cl_free(event->enqueue_cb->wait_list);
   cl_free(event->enqueue_cb);
   event->enqueue_cb = NULL;
 }
@@ -419,22 +424,29 @@ void cl_event_set_status(cl_event event, cl_int status)
   /* Check all defer enqueue */
   enqueue_callback *cb, *enqueue_cb = event->waits_head;
   while(enqueue_cb) {
-/* Remove this user event in enqueue_cb */
-while(enqueue_cb->wait_user_events &&
-  enqueue_cb->wait_user_events->event == event) {
-  u_ev = enqueue_cb->wait_user_events;
-  enqueue_cb->wait_user_events = enqueue_cb->wait_user_events->next;
-  cl_free(u_ev);
-}
-
+/* Remove this user event in enqueue_cb, update the header if needed. */
 u_ev = enqueue_cb->wait_user_events;
+user_event * u_prev = NULL;
+user_event *tmp

[Beignet] [Patch V2 1/2] [opencl 1.2]fix event related bugs.

2014-04-29 Thread xionghu . luo
From: Luo 

---
 src/cl_alloc.c |  1 +
 src/cl_event.c | 64 ++
 2 files changed, 39 insertions(+), 26 deletions(-)

diff --git a/src/cl_alloc.c b/src/cl_alloc.c
index 20d5578..93d2e6a 100644
--- a/src/cl_alloc.c
+++ b/src/cl_alloc.c
@@ -71,6 +71,7 @@ cl_free(void *ptr)
 return;
   atomic_dec(&cl_alloc_n);
   free(ptr);
+  ptr = NULL;
 }
 
 LOCAL size_t
diff --git a/src/cl_event.c b/src/cl_event.c
index 727ee1f..9d21984 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -260,7 +260,7 @@ void cl_event_new_enqueue_callback(cl_event event,
   cl_int i;
   GET_QUEUE_THREAD_GPGPU(data->queue);
 
-  /* Allocate and inialize the structure itself */
+  /* Allocate and initialize the structure itself */
   TRY_ALLOC_NO_ERR (cb, CALLOC(enqueue_callback));
   cb->num_events = num_events_in_wait_list;
   TRY_ALLOC_NO_ERR (cb->wait_list, CALLOC_ARRAY(cl_event, 
num_events_in_wait_list));
@@ -276,12 +276,13 @@ void cl_event_new_enqueue_callback(cl_event event,
   node = queue->wait_events[i]->waits_head;
   if(node == NULL)
 queue->wait_events[i]->waits_head = cb;
-  else
+  else{
 while((node != cb) && node->next)
   node = node->next;
 if(node == cb)   //wait on dup user event
   continue;
 node->next = cb;
+  }
 
   /* Insert the user event to enqueue_callback's wait_user_events */
   TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event));
@@ -291,7 +292,7 @@ void cl_event_new_enqueue_callback(cl_event event,
 }
   }
 
-  /* Find out all user events that events in event_wait_list wait */
+  /* Find out all user events that in event_wait_list wait */
   for(i=0; istatus <= CL_COMPLETE)
   continue;
@@ -319,21 +320,25 @@ void cl_event_new_enqueue_callback(cl_event event,
   while(user_events != NULL) {
 /* Insert the enqueue_callback to user event's  waits_tail */
 node = user_events->event->waits_head;
-while((node != cb) && node->next)
-  node = node->next;
-if(node == cb) {  //wait on dup user event
-  user_events = user_events->next;
-  continue;
+if(node == NULL)
+  event_wait_list[i]->waits_head = cb;
+else{
+  while((node != cb) && node->next)
+node = node->next;
+  if(node == cb) {  //wait on dup user event
+user_events = user_events->next;
+continue;
+  }
+  node->next = cb;
 }
-node->next = cb;
 
 /* Insert the user event to enqueue_callback's wait_user_events */
 TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event));
 u_ev->event = user_events->event;
 u_ev->next = cb->wait_user_events;
 cb->wait_user_events = u_ev;
+cl_command_queue_insert_event(event->queue, user_events->event);
 user_events = user_events->next;
-cl_command_queue_insert_event(event->queue, event_wait_list[i]);
   }
 }
   }
@@ -363,7 +368,7 @@ error:
 void cl_event_set_status(cl_event event, cl_int status)
 {
   user_callback *user_cb;
-  user_event*u_ev, *u_ev_next;
+  user_event*u_ev;
   cl_int ret, i;
   cl_event evt;
 
@@ -419,22 +424,29 @@ void cl_event_set_status(cl_event event, cl_int status)
   /* Check all defer enqueue */
   enqueue_callback *cb, *enqueue_cb = event->waits_head;
   while(enqueue_cb) {
-/* Remove this user event in enqueue_cb */
-while(enqueue_cb->wait_user_events &&
-  enqueue_cb->wait_user_events->event == event) {
-  u_ev = enqueue_cb->wait_user_events;
-  enqueue_cb->wait_user_events = enqueue_cb->wait_user_events->next;
-  cl_free(u_ev);
-}
-
+/* Remove this user event in enqueue_cb, update the header if needed. */
 u_ev = enqueue_cb->wait_user_events;
+user_event * u_prev = NULL;
+user_event *tmp =NULL;
 while(u_ev) {
-  u_ev_next = u_ev->next;
-  if(u_ev_next && u_ev_next->event == event) {
-u_ev->next = u_ev_next->next;
-cl_free(u_ev_next);
-  } else
-u_ev->next = u_ev_next;
+  if(u_ev && u_ev->event == event) {
+if(u_prev){
+  u_prev->next = u_ev->next;
+}
+tmp = u_ev;
+u_ev = u_ev->next;
+cl_free(tmp);
+  }else{
+if(!u_prev){
+  enqueue_cb->wait_user_events = u_ev;
+}
+u_prev=u_ev;
+u_ev = u_ev->next;
+  }
+}
+
+if(!u_prev){
+  enqueue_cb->wait_user_events = NULL;
 }
 
 /* Still wait on other user events */
@@ -448,7 +460,7 @@ void cl_event_set_status(cl_event event, cl_int status)
 
 /* All user events complete, now wait enqueue events */
 ret = cl_event_wait_events(enqueue_cb->num_events, enqueue_cb->wait_list,
-   enqueue_cb->event->queue);
+enqueue_cb->event->queue);
 ret = ret;
 assert(ret != CL_ENQUEUE_EXECUTE_DEFER);
 
-- 
1.8.1.2

___

[Beignet] [Patch V2 2/2] add [opencl 1.2] API clEnqueueMarkerWithWaitList.

2014-04-29 Thread xionghu . luo
From: Luo 

---
 src/cl_api.c   | 25 ++---
 src/cl_event.c | 20 +++-
 src/cl_event.h |  2 +-
 3 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/src/cl_api.c b/src/cl_api.c
index 1543ff4..de67f01 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -2623,8 +2623,8 @@ error:
 }
 
 cl_int
-clEnqueueMarker(cl_command_queue command_queue,
-cl_event *   event)
+clEnqueueMarker(cl_command_queue command_queue,
+cl_event *event)
 {
   cl_int err = CL_SUCCESS;
   CHECK_QUEUE(command_queue);
@@ -2633,7 +2633,26 @@ clEnqueueMarker(cl_command_queue command_queue,
 goto error;
   }
 
-  cl_event_marker(command_queue, event);
+  cl_event_marker_with_wait_list(command_queue, 0, NULL, event);
+error:
+  return err;
+}
+
+cl_int
+clEnqueueMarkerWithWaitList(cl_command_queue command_queue,
+cl_uint num_events_in_wait_list,
+const cl_event *event_wait_list,
+cl_event *event)
+{
+  cl_int err = CL_SUCCESS;
+  CHECK_QUEUE(command_queue);
+  if(event == NULL) {
+err = CL_INVALID_VALUE;
+goto error;
+  }
+  TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, 
event, command_queue->ctx);
+
+  cl_event_marker_with_wait_list(command_queue, num_events_in_wait_list, 
event_wait_list, event);
 error:
   return err;
 }
diff --git a/src/cl_event.c b/src/cl_event.c
index 9d21984..54520d2 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -486,14 +486,32 @@ void cl_event_update_status(cl_event event)
 cl_event_set_status(event, CL_COMPLETE);
 }
 
-cl_int cl_event_marker(cl_command_queue queue, cl_event* event)
+cl_int cl_event_marker_with_wait_list(cl_command_queue queue,
+cl_uint num_events_in_wait_list,
+const cl_event *event_wait_list,
+cl_event* event)
 {
   enqueue_data data;
+  cl_uint i = 0;
 
   *event = cl_event_new(queue->ctx, queue, CL_COMMAND_MARKER, CL_TRUE);
   if(event == NULL)
 return CL_OUT_OF_HOST_MEMORY;
 
+  //insert the input events to queue
+  for(i=0; itype==CL_COMMAND_USER) {
+  cl_command_queue_insert_event(queue, event_wait_list[i]);
+}else if(event_wait_list[i]->enqueue_cb != NULL) {
+  user_event* user_events = 
event_wait_list[i]->enqueue_cb->wait_user_events;
+
+  while(user_events != NULL) {
+cl_command_queue_insert_event(queue, user_events->event);
+user_events = user_events->next;
+  }
+}
+  }
+
   //if wait_events_num>0, the marker event need wait queue->wait_events
   if(queue->wait_events_num > 0) {
 data.type = EnqueueMarker;
diff --git a/src/cl_event.h b/src/cl_event.h
index 3c61110..5a78a8d 100644
--- a/src/cl_event.h
+++ b/src/cl_event.h
@@ -90,7 +90,7 @@ void cl_event_set_status(cl_event, cl_int);
 /* Check and update event status */
 void cl_event_update_status(cl_event);
 /* Create the marker event */
-cl_int cl_event_marker(cl_command_queue, cl_event*);
+cl_int cl_event_marker_with_wait_list(cl_command_queue, cl_uint, const 
cl_event *,  cl_event*);
 /* Do the event profiling */
 cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name);
 #endif /* __CL_EVENT_H__ */
-- 
1.8.1.2

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH V3 2/2] add [opencl 1.2] API clEnqueueMarkerWithWaitList.

2014-04-29 Thread xionghu . luo
From: Luo 

---
 src/cl_api.c   | 25 ++---
 src/cl_event.c | 15 ---
 src/cl_event.h |  2 +-
 3 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/src/cl_api.c b/src/cl_api.c
index 1543ff4..de67f01 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -2623,8 +2623,8 @@ error:
 }
 
 cl_int
-clEnqueueMarker(cl_command_queue command_queue,
-cl_event *   event)
+clEnqueueMarker(cl_command_queue command_queue,
+cl_event *event)
 {
   cl_int err = CL_SUCCESS;
   CHECK_QUEUE(command_queue);
@@ -2633,7 +2633,26 @@ clEnqueueMarker(cl_command_queue command_queue,
 goto error;
   }
 
-  cl_event_marker(command_queue, event);
+  cl_event_marker_with_wait_list(command_queue, 0, NULL, event);
+error:
+  return err;
+}
+
+cl_int
+clEnqueueMarkerWithWaitList(cl_command_queue command_queue,
+cl_uint num_events_in_wait_list,
+const cl_event *event_wait_list,
+cl_event *event)
+{
+  cl_int err = CL_SUCCESS;
+  CHECK_QUEUE(command_queue);
+  if(event == NULL) {
+err = CL_INVALID_VALUE;
+goto error;
+  }
+  TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, 
event, command_queue->ctx);
+
+  cl_event_marker_with_wait_list(command_queue, num_events_in_wait_list, 
event_wait_list, event);
 error:
   return err;
 }
diff --git a/src/cl_event.c b/src/cl_event.c
index 9d21984..aa065c2 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -486,16 +486,25 @@ void cl_event_update_status(cl_event event)
 cl_event_set_status(event, CL_COMPLETE);
 }
 
-cl_int cl_event_marker(cl_command_queue queue, cl_event* event)
+cl_int cl_event_marker_with_wait_list(cl_command_queue queue,
+cl_uint num_events_in_wait_list,
+const cl_event *event_wait_list,
+cl_event* event)
 {
   enqueue_data data;
+  cl_uint i = 0;
 
   *event = cl_event_new(queue->ctx, queue, CL_COMMAND_MARKER, CL_TRUE);
   if(event == NULL)
 return CL_OUT_OF_HOST_MEMORY;
 
-  //if wait_events_num>0, the marker event need wait queue->wait_events
-  if(queue->wait_events_num > 0) {
+  //enqueues a marker command which waits for either a list of events to 
complete, or if the list is
+  //empty it waits for all commands previously enqueued in command_queue to 
complete before it  completes.
+  if(num_events_in_wait_list > 0){
+data.type = EnqueueMarker;
+cl_event_new_enqueue_callback(*event, &data, num_events_in_wait_list, 
event_wait_list);
+return CL_SUCCESS;
+  } else if(queue->wait_events_num > 0) {
 data.type = EnqueueMarker;
 cl_event_new_enqueue_callback(*event, &data, queue->wait_events_num, 
queue->wait_events);
 return CL_SUCCESS;
diff --git a/src/cl_event.h b/src/cl_event.h
index 3c61110..5a78a8d 100644
--- a/src/cl_event.h
+++ b/src/cl_event.h
@@ -90,7 +90,7 @@ void cl_event_set_status(cl_event, cl_int);
 /* Check and update event status */
 void cl_event_update_status(cl_event);
 /* Create the marker event */
-cl_int cl_event_marker(cl_command_queue, cl_event*);
+cl_int cl_event_marker_with_wait_list(cl_command_queue, cl_uint, const 
cl_event *,  cl_event*);
 /* Do the event profiling */
 cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name);
 #endif /* __CL_EVENT_H__ */
-- 
1.8.1.2

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH v4 2/2] add [opencl-1.2] API clEnqueueMarkerWithWaitList.

2014-04-29 Thread xionghu . luo
From: Luo 

---
 src/cl_api.c   | 25 ++---
 src/cl_event.c | 14 +++---
 src/cl_event.h |  2 +-
 3 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/src/cl_api.c b/src/cl_api.c
index 1543ff4..de67f01 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -2623,8 +2623,8 @@ error:
 }
 
 cl_int
-clEnqueueMarker(cl_command_queue command_queue,
-cl_event *   event)
+clEnqueueMarker(cl_command_queue command_queue,
+cl_event *event)
 {
   cl_int err = CL_SUCCESS;
   CHECK_QUEUE(command_queue);
@@ -2633,7 +2633,26 @@ clEnqueueMarker(cl_command_queue command_queue,
 goto error;
   }
 
-  cl_event_marker(command_queue, event);
+  cl_event_marker_with_wait_list(command_queue, 0, NULL, event);
+error:
+  return err;
+}
+
+cl_int
+clEnqueueMarkerWithWaitList(cl_command_queue command_queue,
+cl_uint num_events_in_wait_list,
+const cl_event *event_wait_list,
+cl_event *event)
+{
+  cl_int err = CL_SUCCESS;
+  CHECK_QUEUE(command_queue);
+  if(event == NULL) {
+err = CL_INVALID_VALUE;
+goto error;
+  }
+  TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, 
event, command_queue->ctx);
+
+  cl_event_marker_with_wait_list(command_queue, num_events_in_wait_list, 
event_wait_list, event);
 error:
   return err;
 }
diff --git a/src/cl_event.c b/src/cl_event.c
index 9d21984..49dd423 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -486,7 +486,10 @@ void cl_event_update_status(cl_event event)
 cl_event_set_status(event, CL_COMPLETE);
 }
 
-cl_int cl_event_marker(cl_command_queue queue, cl_event* event)
+cl_int cl_event_marker_with_wait_list(cl_command_queue queue,
+cl_uint num_events_in_wait_list,
+const cl_event *event_wait_list,
+cl_event* event)
 {
   enqueue_data data;
 
@@ -494,8 +497,13 @@ cl_int cl_event_marker(cl_command_queue queue, cl_event* 
event)
   if(event == NULL)
 return CL_OUT_OF_HOST_MEMORY;
 
-  //if wait_events_num>0, the marker event need wait queue->wait_events
-  if(queue->wait_events_num > 0) {
+  //enqueues a marker command which waits for either a list of events to 
complete, or if the list is
+  //empty it waits for all commands previously enqueued in command_queue to 
complete before it  completes.
+  if(num_events_in_wait_list > 0){
+data.type = EnqueueMarker;
+cl_event_new_enqueue_callback(*event, &data, num_events_in_wait_list, 
event_wait_list);
+return CL_SUCCESS;
+  } else if(queue->wait_events_num > 0) {
 data.type = EnqueueMarker;
 cl_event_new_enqueue_callback(*event, &data, queue->wait_events_num, 
queue->wait_events);
 return CL_SUCCESS;
diff --git a/src/cl_event.h b/src/cl_event.h
index 3c61110..5a78a8d 100644
--- a/src/cl_event.h
+++ b/src/cl_event.h
@@ -90,7 +90,7 @@ void cl_event_set_status(cl_event, cl_int);
 /* Check and update event status */
 void cl_event_update_status(cl_event);
 /* Create the marker event */
-cl_int cl_event_marker(cl_command_queue, cl_event*);
+cl_int cl_event_marker_with_wait_list(cl_command_queue, cl_uint, const 
cl_event *,  cl_event*);
 /* Do the event profiling */
 cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name);
 #endif /* __CL_EVENT_H__ */
-- 
1.8.1.2

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 1/2] add test case barrier_list and marker_list.

2014-05-04 Thread xionghu . luo
From: Luo 


Signed-off-by: Luo 
---
 utests/CMakeLists.txt   |  2 ++
 utests/barrier_list.cpp | 75 +
 utests/marker_list.cpp  | 75 +
 3 files changed, 152 insertions(+)
 create mode 100644 utests/barrier_list.cpp
 create mode 100644 utests/marker_list.cpp

diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 415dcb6..f9cc3f1 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -153,6 +153,8 @@ set (utests_sources
   runtime_createcontext.cpp
   runtime_null_kernel_arg.cpp
   runtime_event.cpp
+  barrier_list.cpp
+  marker_list.cpp
   compiler_double.cpp
   compiler_double_2.cpp
   compiler_double_3.cpp
diff --git a/utests/barrier_list.cpp b/utests/barrier_list.cpp
new file mode 100644
index 000..e672482
--- /dev/null
+++ b/utests/barrier_list.cpp
@@ -0,0 +1,75 @@
+#include "utest_helper.hpp"
+
+#define BUFFERSIZE  32*1024
+void barrier_list(void)
+{
+  const size_t n = BUFFERSIZE;
+  cl_int cpu_src[BUFFERSIZE];
+  cl_int cpu_src_2[BUFFERSIZE];
+  cl_event ev[5];
+  cl_int status = 0;
+  cl_int value = 34;
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL("compiler_event");
+  OCL_CREATE_BUFFER(buf[0], 0, BUFFERSIZE*sizeof(int), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, BUFFERSIZE*sizeof(int), NULL);
+
+  for(cl_uint i=0; i= CL_SUBMITTED);
+  }
+
+
+  buf_data[0] = clEnqueueMapBuffer(queue, buf[0], CL_TRUE, 0, 0, 
BUFFERSIZE*sizeof(int), 1, &ev[2], NULL, NULL);
+
+  clEnqueueBarrierWithWaitList(queue, 0, NULL, &ev[3]);
+
+  clEnqueueWriteBuffer(queue, buf[1], CL_TRUE, 0, BUFFERSIZE*sizeof(int), 
(void *)cpu_src_2, 0, NULL, &ev[4]);
+
+  OCL_FINISH();
+  clGetEventInfo(ev[4], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), 
&status, NULL);
+  OCL_ASSERT(status != CL_COMPLETE);
+
+  OCL_SET_USER_EVENT_STATUS(ev[0], CL_COMPLETE);
+
+  clGetEventInfo(ev[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), 
&status, NULL);
+  OCL_ASSERT(status == CL_COMPLETE);
+
+  OCL_FINISH();
+
+  for (cl_uint i = 0; i != sizeof(ev) / sizeof(cl_event); ++i) {
+clGetEventInfo(ev[i], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), 
&status, NULL);
+OCL_ASSERT(status <= CL_COMPLETE);
+  }
+
+  for (uint32_t i = 0; i < n; ++i) {
+OCL_ASSERT(((int*)buf_data[0])[i] == (int)value + 0x3);
+  }
+  clEnqueueUnmapMemObject(queue, buf[0], buf_data[0], 0, NULL, NULL);
+
+  for (cl_uint i = 0; i != sizeof(ev) / sizeof(cl_event); ++i) {
+clReleaseEvent(ev[i]);
+  }
+}
+
+MAKE_UTEST_FROM_FUNCTION(barrier_list);
diff --git a/utests/marker_list.cpp b/utests/marker_list.cpp
new file mode 100644
index 000..cb4e749
--- /dev/null
+++ b/utests/marker_list.cpp
@@ -0,0 +1,75 @@
+#include "utest_helper.hpp"
+
+#define BUFFERSIZE  32*1024
+void marker_list(void)
+{
+  const size_t n = BUFFERSIZE;
+  cl_int cpu_src[BUFFERSIZE];
+  cl_int cpu_src_2[BUFFERSIZE];
+  cl_event ev[5];
+  cl_int status = 0;
+  cl_int value = 34;
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL("compiler_event");
+  OCL_CREATE_BUFFER(buf[0], 0, BUFFERSIZE*sizeof(int), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, BUFFERSIZE*sizeof(int), NULL);
+
+  for(cl_uint i=0; i= CL_SUBMITTED);
+  }
+
+
+  buf_data[0] = clEnqueueMapBuffer(queue, buf[0], CL_TRUE, 0, 0, 
BUFFERSIZE*sizeof(int), 1, &ev[2], NULL, NULL);
+
+  clEnqueueMarkerWithWaitList(queue, 0, NULL, &ev[3]);
+
+  clEnqueueWriteBuffer(queue, buf[1], CL_TRUE, 0, BUFFERSIZE*sizeof(int), 
(void *)cpu_src_2, 0, NULL, &ev[4]);
+
+  OCL_FINISH();
+  clGetEventInfo(ev[4], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), 
&status, NULL);
+  OCL_ASSERT(status == CL_COMPLETE);
+
+  OCL_SET_USER_EVENT_STATUS(ev[0], CL_COMPLETE);
+
+  clGetEventInfo(ev[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), 
&status, NULL);
+  OCL_ASSERT(status == CL_COMPLETE);
+
+  OCL_FINISH();
+
+  for (cl_uint i = 0; i != sizeof(ev) / sizeof(cl_event); ++i) {
+clGetEventInfo(ev[i], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), 
&status, NULL);
+OCL_ASSERT(status <= CL_COMPLETE);
+  }
+
+  for (uint32_t i = 0; i < n; ++i) {
+OCL_ASSERT(((int*)buf_data[0])[i] == (int)value + 0x3);
+  }
+  clEnqueueUnmapMemObject(queue, buf[0], buf_data[0], 0, NULL, NULL);
+
+  for (cl_uint i = 0; i != sizeof(ev) / sizeof(cl_event); ++i) {
+clReleaseEvent(ev[i]);
+  }
+}
+
+MAKE_UTEST_FROM_FUNCTION(marker_list);
-- 
1.8.1.2

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 2/2] add [opencl-1.2] API clEnqueueBarrierWithWaitList.

2014-05-04 Thread xionghu . luo
From: Luo 

 API clEnqueueMarkerWithWaitList patch didn't push the latest, update in
 this patch;
the difference between clEnqueueMarkerWithWaitList and this API  is
that this API would block later enqueued calls, so added a barrier_events
queue to manager barrier's wait list.

Signed-off-by: Luo 
---
 src/cl_api.c   |  22 +++-
 src/cl_command_queue.c |  66 +--
 src/cl_command_queue.h |   9 ++--
 src/cl_enqueue.h   |   1 +
 src/cl_event.c | 144 +
 src/cl_event.h |   6 +++
 6 files changed, 206 insertions(+), 42 deletions(-)

diff --git a/src/cl_api.c b/src/cl_api.c
index 9c22819..8902665 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -2753,8 +2753,28 @@ clEnqueueBarrier(cl_command_queue  command_queue)
 {
   cl_int err = CL_SUCCESS;
   CHECK_QUEUE(command_queue);
-  cl_command_queue_set_barrier(command_queue);
 
+  cl_event_barrier_with_wait_list(command_queue, 0, NULL, NULL);
+
+error:
+  return err;
+}
+
+cl_int
+clEnqueueBarrierWithWaitList(cl_command_queue command_queue,
+cl_uint num_events_in_wait_list,
+const cl_event *event_wait_list,
+cl_event *event)
+{
+  cl_int err = CL_SUCCESS;
+  CHECK_QUEUE(command_queue);
+  if(event == NULL) {
+err = CL_INVALID_VALUE;
+goto error;
+  }
+  TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, 
event, command_queue->ctx);
+
+  cl_event_barrier_with_wait_list(command_queue, num_events_in_wait_list, 
event_wait_list, event);
 error:
   return err;
 }
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 6a699c0..c754ad5 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -492,18 +492,74 @@ cl_command_queue_remove_event(cl_command_queue queue, 
cl_event event)
   if(i == queue->wait_events_num)
 return;
 
-  if(queue->barrier_index >= i)
-queue->barrier_index -= 1;
-
   for(; iwait_events_num-1; i++) {
 queue->wait_events[i] = queue->wait_events[i+1];
   }
   queue->wait_events_num -= 1;
 }
 
+#define DEFAULT_WAIT_EVENTS_SIZE  16
 LOCAL void
-cl_command_queue_set_barrier(cl_command_queue queue)
+cl_command_queue_insert_barrier_event(cl_command_queue queue, cl_event event)
 {
-queue->barrier_index = queue->wait_events_num;
+  cl_int i=0;
+  cl_event *new_list;
+
+  assert(queue != NULL);
+  if(queue->barrier_events == NULL) {
+queue->barrier_events_size = DEFAULT_WAIT_EVENTS_SIZE;
+TRY_ALLOC_NO_ERR (queue->barrier_events, CALLOC_ARRAY(cl_event, 
queue->barrier_events_size));
+  }
+
+  for(i=0; ibarrier_events_num; i++) {
+if(queue->barrier_events[i] == event)
+  return;   //is in the barrier_events, need to insert
+  }
+
+  if(queue->barrier_events_num < queue->barrier_events_size) {
+queue->barrier_events[queue->barrier_events_num++] = event;
+return;
+  }
+
+  //barrier_events_num == barrier_events_size, array is full
+  queue->barrier_events_size *= 2;
+  TRY_ALLOC_NO_ERR (new_list, CALLOC_ARRAY(cl_event, 
queue->barrier_events_size));
+  memcpy(new_list, queue->barrier_events, 
sizeof(cl_event)*queue->barrier_events_num);
+  cl_free(queue->barrier_events);
+  queue->barrier_events = new_list;
+  queue->barrier_events[queue->barrier_events_num++] = event;
+  return;
+
+exit:
+  return;
+error:
+  if(queue->barrier_events)
+cl_free(queue->barrier_events);
+  queue->barrier_events = NULL;
+  queue->barrier_events_size = 0;
+  queue->barrier_events_num = 0;
+  goto exit;
+
 }
 
+LOCAL void
+cl_command_queue_remove_barrier_event(cl_command_queue queue, cl_event event)
+{
+  cl_int i=0;
+
+  if(queue->barrier_events_num == 0)
+return;
+
+  for(i=0; ibarrier_events_num; i++) {
+if(queue->barrier_events[i] == event)
+  break;
+  }
+
+  if(i == queue->barrier_events_num)
+return;
+
+  for(; ibarrier_events_num-1; i++) {
+queue->barrier_events[i] = queue->barrier_events[i+1];
+  }
+  queue->barrier_events_num -= 1;
+}
diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h
index 40c272c..b79d63a 100644
--- a/src/cl_command_queue.h
+++ b/src/cl_command_queue.h
@@ -34,10 +34,12 @@ struct _cl_command_queue {
   uint64_t magic;  /* To identify it as a command queue */
   volatile int ref_n;  /* We reference count this object */
   cl_context ctx;  /* Its parent context */
+  cl_event* barrier_events;   /* Point to array of non-complete 
user events that block this command queue */
+  cl_intbarrier_events_num;   /* Number of Non-complete user 
events */
+  cl_intbarrier_events_size;  /* The size of array that 
wait_events point to */
   cl_event* wait_events;   /* Point to array of non-complete user 
events that block this command queue */
   cl_intwait_events_num;   /* Number of Non-complete user events */
   cl_intwait_events_size;  /* The size of array that wait_events 
point to */
-  cl_intbarrier_index; 

[Beignet] [PATCH V1 2/2] add test case runtime_barrier_list and runtime_marker_list.

2014-05-06 Thread xionghu . luo
From: Luo 

---
 utests/CMakeLists.txt   |  2 ++
 utests/runtime_barrier_list.cpp | 75 +
 utests/runtime_marker_list.cpp  | 75 +
 3 files changed, 152 insertions(+)
 create mode 100644 utests/runtime_barrier_list.cpp
 create mode 100644 utests/runtime_marker_list.cpp

diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 415dcb6..fcce083 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -153,6 +153,8 @@ set (utests_sources
   runtime_createcontext.cpp
   runtime_null_kernel_arg.cpp
   runtime_event.cpp
+  runtime_barrier_list.cpp
+  runtime_marker_list.cpp
   compiler_double.cpp
   compiler_double_2.cpp
   compiler_double_3.cpp
diff --git a/utests/runtime_barrier_list.cpp b/utests/runtime_barrier_list.cpp
new file mode 100644
index 000..6987d5e
--- /dev/null
+++ b/utests/runtime_barrier_list.cpp
@@ -0,0 +1,75 @@
+#include "utest_helper.hpp"
+
+#define BUFFERSIZE  32*1024
+void runtime_barrier_list(void)
+{
+  const size_t n = BUFFERSIZE;
+  cl_int cpu_src[BUFFERSIZE];
+  cl_int cpu_src_2[BUFFERSIZE];
+  cl_event ev[5];
+  cl_int status = 0;
+  cl_int value = 34;
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL("compiler_event");
+  OCL_CREATE_BUFFER(buf[0], 0, BUFFERSIZE*sizeof(int), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, BUFFERSIZE*sizeof(int), NULL);
+
+  for(cl_uint i=0; i= CL_SUBMITTED);
+  }
+
+
+  buf_data[0] = clEnqueueMapBuffer(queue, buf[0], CL_TRUE, 0, 0, 
BUFFERSIZE*sizeof(int), 1, &ev[2], NULL, NULL);
+
+  clEnqueueBarrierWithWaitList(queue, 0, NULL, &ev[3]);
+
+  clEnqueueWriteBuffer(queue, buf[1], CL_TRUE, 0, BUFFERSIZE*sizeof(int), 
(void *)cpu_src_2, 0, NULL, &ev[4]);
+
+  OCL_FINISH();
+  clGetEventInfo(ev[4], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), 
&status, NULL);
+  OCL_ASSERT(status != CL_COMPLETE);
+
+  OCL_SET_USER_EVENT_STATUS(ev[0], CL_COMPLETE);
+
+  clGetEventInfo(ev[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), 
&status, NULL);
+  OCL_ASSERT(status == CL_COMPLETE);
+
+  OCL_FINISH();
+
+  for (cl_uint i = 0; i != sizeof(ev) / sizeof(cl_event); ++i) {
+clGetEventInfo(ev[i], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), 
&status, NULL);
+OCL_ASSERT(status <= CL_COMPLETE);
+  }
+
+  for (uint32_t i = 0; i < n; ++i) {
+OCL_ASSERT(((int*)buf_data[0])[i] == (int)value + 0x3);
+  }
+  clEnqueueUnmapMemObject(queue, buf[0], buf_data[0], 0, NULL, NULL);
+
+  for (cl_uint i = 0; i != sizeof(ev) / sizeof(cl_event); ++i) {
+clReleaseEvent(ev[i]);
+  }
+}
+
+MAKE_UTEST_FROM_FUNCTION(runtime_barrier_list);
diff --git a/utests/runtime_marker_list.cpp b/utests/runtime_marker_list.cpp
new file mode 100644
index 000..fc77156
--- /dev/null
+++ b/utests/runtime_marker_list.cpp
@@ -0,0 +1,75 @@
+#include "utest_helper.hpp"
+
+#define BUFFERSIZE  32*1024
+void runtime_marker_list(void)
+{
+  const size_t n = BUFFERSIZE;
+  cl_int cpu_src[BUFFERSIZE];
+  cl_int cpu_src_2[BUFFERSIZE];
+  cl_event ev[5];
+  cl_int status = 0;
+  cl_int value = 34;
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL("compiler_event");
+  OCL_CREATE_BUFFER(buf[0], 0, BUFFERSIZE*sizeof(int), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, BUFFERSIZE*sizeof(int), NULL);
+
+  for(cl_uint i=0; i= CL_SUBMITTED);
+  }
+
+
+  buf_data[0] = clEnqueueMapBuffer(queue, buf[0], CL_TRUE, 0, 0, 
BUFFERSIZE*sizeof(int), 1, &ev[2], NULL, NULL);
+
+  clEnqueueMarkerWithWaitList(queue, 0, NULL, &ev[3]);
+
+  clEnqueueWriteBuffer(queue, buf[1], CL_TRUE, 0, BUFFERSIZE*sizeof(int), 
(void *)cpu_src_2, 0, NULL, &ev[4]);
+
+  OCL_FINISH();
+  clGetEventInfo(ev[4], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), 
&status, NULL);
+  OCL_ASSERT(status == CL_COMPLETE);
+
+  OCL_SET_USER_EVENT_STATUS(ev[0], CL_COMPLETE);
+
+  clGetEventInfo(ev[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), 
&status, NULL);
+  OCL_ASSERT(status == CL_COMPLETE);
+
+  OCL_FINISH();
+
+  for (cl_uint i = 0; i != sizeof(ev) / sizeof(cl_event); ++i) {
+clGetEventInfo(ev[i], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), 
&status, NULL);
+OCL_ASSERT(status <= CL_COMPLETE);
+  }
+
+  for (uint32_t i = 0; i < n; ++i) {
+OCL_ASSERT(((int*)buf_data[0])[i] == (int)value + 0x3);
+  }
+  clEnqueueUnmapMemObject(queue, buf[0], buf_data[0], 0, NULL, NULL);
+
+  for (cl_uint i = 0; i != sizeof(ev) / sizeof(cl_event); ++i) {
+clReleaseEvent(ev[i]);
+  }
+}
+
+MAKE_UTEST_FROM_FUNCTION(runtime_marker_list);
-- 
1.8.1.2

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH V1 1/2] add [opencl-1.2] API clEnqueueBarrierWithWaitList.

2014-05-06 Thread xionghu . luo
From: Luo 

This command blocks command execution, that is, any following commands
enqueued after it do not execute until it completes;
API clEnqueueMarkerWithWaitList patch didn't push the latest, update in
 this patch.
---
 src/cl_api.c   |  19 +-
 src/cl_command_queue.c |  66 +--
 src/cl_command_queue.h |   9 ++-
 src/cl_enqueue.h   |   1 +
 src/cl_event.c | 167 -
 src/cl_event.h |   6 ++
 6 files changed, 202 insertions(+), 66 deletions(-)

diff --git a/src/cl_api.c b/src/cl_api.c
index 9c22819..ad70b65 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -2753,8 +2753,25 @@ clEnqueueBarrier(cl_command_queue  command_queue)
 {
   cl_int err = CL_SUCCESS;
   CHECK_QUEUE(command_queue);
-  cl_command_queue_set_barrier(command_queue);
 
+  cl_event_barrier_with_wait_list(command_queue, 0, NULL, NULL);
+
+error:
+  return err;
+}
+
+cl_int
+clEnqueueBarrierWithWaitList(cl_command_queue command_queue,
+cl_uint num_events_in_wait_list,
+const cl_event *event_wait_list,
+cl_event *event)
+{
+  cl_int err = CL_SUCCESS;
+  CHECK_QUEUE(command_queue);
+
+  TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, 
event, command_queue->ctx);
+
+  cl_event_barrier_with_wait_list(command_queue, num_events_in_wait_list, 
event_wait_list, event);
 error:
   return err;
 }
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 6a699c0..c754ad5 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -492,18 +492,74 @@ cl_command_queue_remove_event(cl_command_queue queue, 
cl_event event)
   if(i == queue->wait_events_num)
 return;
 
-  if(queue->barrier_index >= i)
-queue->barrier_index -= 1;
-
   for(; iwait_events_num-1; i++) {
 queue->wait_events[i] = queue->wait_events[i+1];
   }
   queue->wait_events_num -= 1;
 }
 
+#define DEFAULT_WAIT_EVENTS_SIZE  16
 LOCAL void
-cl_command_queue_set_barrier(cl_command_queue queue)
+cl_command_queue_insert_barrier_event(cl_command_queue queue, cl_event event)
 {
-queue->barrier_index = queue->wait_events_num;
+  cl_int i=0;
+  cl_event *new_list;
+
+  assert(queue != NULL);
+  if(queue->barrier_events == NULL) {
+queue->barrier_events_size = DEFAULT_WAIT_EVENTS_SIZE;
+TRY_ALLOC_NO_ERR (queue->barrier_events, CALLOC_ARRAY(cl_event, 
queue->barrier_events_size));
+  }
+
+  for(i=0; ibarrier_events_num; i++) {
+if(queue->barrier_events[i] == event)
+  return;   //is in the barrier_events, need to insert
+  }
+
+  if(queue->barrier_events_num < queue->barrier_events_size) {
+queue->barrier_events[queue->barrier_events_num++] = event;
+return;
+  }
+
+  //barrier_events_num == barrier_events_size, array is full
+  queue->barrier_events_size *= 2;
+  TRY_ALLOC_NO_ERR (new_list, CALLOC_ARRAY(cl_event, 
queue->barrier_events_size));
+  memcpy(new_list, queue->barrier_events, 
sizeof(cl_event)*queue->barrier_events_num);
+  cl_free(queue->barrier_events);
+  queue->barrier_events = new_list;
+  queue->barrier_events[queue->barrier_events_num++] = event;
+  return;
+
+exit:
+  return;
+error:
+  if(queue->barrier_events)
+cl_free(queue->barrier_events);
+  queue->barrier_events = NULL;
+  queue->barrier_events_size = 0;
+  queue->barrier_events_num = 0;
+  goto exit;
+
 }
 
+LOCAL void
+cl_command_queue_remove_barrier_event(cl_command_queue queue, cl_event event)
+{
+  cl_int i=0;
+
+  if(queue->barrier_events_num == 0)
+return;
+
+  for(i=0; ibarrier_events_num; i++) {
+if(queue->barrier_events[i] == event)
+  break;
+  }
+
+  if(i == queue->barrier_events_num)
+return;
+
+  for(; ibarrier_events_num-1; i++) {
+queue->barrier_events[i] = queue->barrier_events[i+1];
+  }
+  queue->barrier_events_num -= 1;
+}
diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h
index 40c272c..b79d63a 100644
--- a/src/cl_command_queue.h
+++ b/src/cl_command_queue.h
@@ -34,10 +34,12 @@ struct _cl_command_queue {
   uint64_t magic;  /* To identify it as a command queue */
   volatile int ref_n;  /* We reference count this object */
   cl_context ctx;  /* Its parent context */
+  cl_event* barrier_events;   /* Point to array of non-complete 
user events that block this command queue */
+  cl_intbarrier_events_num;   /* Number of Non-complete user 
events */
+  cl_intbarrier_events_size;  /* The size of array that 
wait_events point to */
   cl_event* wait_events;   /* Point to array of non-complete user 
events that block this command queue */
   cl_intwait_events_num;   /* Number of Non-complete user events */
   cl_intwait_events_size;  /* The size of array that wait_events 
point to */
-  cl_intbarrier_index; /* Indicate event count in wait_events 
as barrier events */
   cl_event  last_event;/* The last event in the queue, for 
enqueue mark

[Beignet] [PATCH 1/3] [opencl-1.2] move enqueue_copy_image kernels outside of runtime code.

2014-05-08 Thread xionghu . luo
From: Luo 

seperate the kernel code from host code to make it clean;
build the kernels offline by gbe_bin_generator to improve the
performance.
---
 src/CMakeLists.txt |  25 ++-
 src/cl_context.h   |  16 +-
 src/cl_gt_device.h |  23 ++-
 src/cl_mem.c   | 214 ++---
 src/kernels/cl_internal_copy_buf_align1.cl |   8 -
 src/kernels/cl_internal_copy_buf_align16.cl|   2 +-
 src/kernels/cl_internal_copy_buf_align4.cl |   2 +-
 src/kernels/cl_internal_copy_buf_rect.cl   |  15 ++
 .../cl_internal_copy_buf_unalign_dst_offset.cl |   2 +-
 .../cl_internal_copy_buf_unalign_same_offset.cl|   2 +-
 .../cl_internal_copy_buf_unalign_src_offset.cl |   2 +-
 src/kernels/cl_internal_copy_buffer_to_image_2d.cl |  18 ++
 src/kernels/cl_internal_copy_buffer_to_image_3d.cl |  19 ++
 src/kernels/cl_internal_copy_image_2d_to_2d.cl |  21 ++
 src/kernels/cl_internal_copy_image_2d_to_3d.cl |  22 +++
 src/kernels/cl_internal_copy_image_2d_to_buffer.cl |  19 ++
 src/kernels/cl_internal_copy_image_3d_to_2d.cl |  22 +++
 src/kernels/cl_internal_copy_image_3d_to_3d.cl |  23 +++
 src/kernels/cl_internal_copy_image_3d_to_buffer.cl |  22 +++
 19 files changed, 301 insertions(+), 176 deletions(-)
 delete mode 100644 src/kernels/cl_internal_copy_buf_align1.cl
 create mode 100644 src/kernels/cl_internal_copy_buf_rect.cl
 create mode 100644 src/kernels/cl_internal_copy_buffer_to_image_2d.cl
 create mode 100644 src/kernels/cl_internal_copy_buffer_to_image_3d.cl
 create mode 100644 src/kernels/cl_internal_copy_image_2d_to_2d.cl
 create mode 100644 src/kernels/cl_internal_copy_image_2d_to_3d.cl
 create mode 100644 src/kernels/cl_internal_copy_image_2d_to_buffer.cl
 create mode 100644 src/kernels/cl_internal_copy_image_3d_to_2d.cl
 create mode 100644 src/kernels/cl_internal_copy_image_3d_to_3d.cl
 create mode 100644 src/kernels/cl_internal_copy_image_3d_to_buffer.cl

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a3bac02..da7e1eb 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -17,14 +17,33 @@ foreach (KF ${KERNEL_FILES})
 endforeach (KF)
 endmacro (MakeKernelBinStr)
 
+macro (MakeBuiltInKernelStr KERNEL_PATH KERNEL_FILES)
+  set (output_file ${KERNEL_PATH}/${BUILT_IN_NAME}.cl)
+  set (file_content)
+  file (REMOVE ${output_file})
+  foreach (KF ${KERNEL_NAMES})
+set (input_file ${KERNEL_PATH}/${KF}.cl)
+file(READ ${input_file} file_content )
+STRING(REGEX REPLACE ";" ";" file_content "${file_content}")
+file(APPEND ${output_file} ${file_content})
+  endforeach (KF)
+endmacro (MakeBuiltInKernelStr)
+
 set (KERNEL_STR_FILES)
-set (KERNEL_NAMES cl_internal_copy_buf_align1 cl_internal_copy_buf_align4
+set (KERNEL_NAMES cl_internal_copy_buf_align4
 cl_internal_copy_buf_align16 cl_internal_copy_buf_unalign_same_offset
 cl_internal_copy_buf_unalign_dst_offset cl_internal_copy_buf_unalign_src_offset
-cl_internal_fill_buf_align8 cl_internal_fill_buf_align4
-cl_internal_fill_buf_align2 cl_internal_fill_buf_unalign
+cl_internal_copy_buf_rect cl_internal_copy_image_2d_to_2d 
cl_internal_copy_image_3d_to_2d
+cl_internal_copy_image_2d_to_3d cl_internal_copy_image_3d_to_3d
+cl_internal_copy_image_2d_to_buffer cl_internal_copy_image_3d_to_buffer
+cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_3d
+cl_internal_fill_buf_unalign cl_internal_fill_buf_align2
+cl_internal_fill_buf_align4 cl_internal_fill_buf_align8
 cl_internal_fill_buf_align128)
+set (BUILT_IN_NAME  cl_internal_built_in_kernel)
+MakeBuiltInKernelStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}")
 MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}")
+MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${BUILT_IN_NAME}")
 
 set(OPENCL_SRC
 ${KERNEL_STR_FILES}
diff --git a/src/cl_context.h b/src/cl_context.h
index b2562ce..65b1728 100644
--- a/src/cl_context.h
+++ b/src/cl_context.h
@@ -47,14 +47,14 @@ enum _cl_internal_ker_type {
   CL_ENQUEUE_COPY_BUFFER_UNALIGN_DST_OFFSET,
   CL_ENQUEUE_COPY_BUFFER_UNALIGN_SRC_OFFSET,
   CL_ENQUEUE_COPY_BUFFER_RECT,
-  CL_ENQUEUE_COPY_IMAGE_0, //copy image 2d to image 2d
-  CL_ENQUEUE_COPY_IMAGE_1, //copy image 3d to image 2d
-  CL_ENQUEUE_COPY_IMAGE_2, //copy image 2d to image 3d
-  CL_ENQUEUE_COPY_IMAGE_3, //copy image 3d to image 3d
-  CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_0,   //copy image 2d to buffer
-  CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_1,   //copy image 3d tobuffer
-  CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_0,   //copy buffer to image 2d
-  CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_1,   //copy buffer to image 3d
+  CL_ENQUEUE_COPY_IMAGE_2D_TO_2D, //copy image 2d to image 2d
+  CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, //copy image 3d to image 2d
+  CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, //copy image 2d to imag

[Beignet] [PATCH 2/3] add [opencl-1.2] API clCreateProgramWithBuiltinKernel.

2014-05-08 Thread xionghu . luo
From: Luo 

This API creates a built-in program object for a context, and loads
the built-in kernels into this program object.
---
 backend/src/ir/image.cpp |  5 
 src/cl_api.c | 24 
 src/cl_context.c |  8 ++
 src/cl_context.h |  2 ++
 src/cl_program.c | 74 
 src/cl_program.h |  7 +
 6 files changed, 120 insertions(+)

diff --git a/backend/src/ir/image.cpp b/backend/src/ir/image.cpp
index 8c34d70..87bafc0 100644
--- a/backend/src/ir/image.cpp
+++ b/backend/src/ir/image.cpp
@@ -125,7 +125,12 @@ namespace ir {
 
   void ImageSet::getData(struct ImageInfo *imageInfos) const {
   for(auto &it : regMap)
+  {
+int t = it.second->idx - gbe_get_image_base_index();
+if(t < 0)
+  continue;
 imageInfos[it.second->idx - gbe_get_image_base_index()] = *it.second;
+  }
   }
 
   ImageSet::~ImageSet() {
diff --git a/src/cl_api.c b/src/cl_api.c
index 9c22819..0a1c4ab 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -816,6 +816,30 @@ error:
 *errcode_ret = err;
   return program;
 }
+
+cl_program
+clCreateProgramWithBuiltInKernels(cl_context   context,
+  cl_uint  num_devices,
+  const cl_device_id * device_list,
+  const char * kernel_names,
+  cl_int * errcode_ret)
+{
+  cl_program program = NULL;
+  cl_int err = CL_SUCCESS;
+
+  CHECK_CONTEXT (context);
+  INVALID_VALUE_IF (kernel_names == NULL);
+  program = cl_program_create_with_built_in_kernles(context,
+num_devices,
+device_list,
+kernel_names,
+&err);
+error:
+  if (errcode_ret)
+*errcode_ret = err;
+  return program;
+}
+
 cl_int
 clRetainProgram(cl_program program)
 {
diff --git a/src/cl_context.c b/src/cl_context.c
index 293af94..6172ecc 100644
--- a/src/cl_context.c
+++ b/src/cl_context.c
@@ -206,8 +206,16 @@ cl_context_delete(cl_context ctx)
   cl_program_delete(ctx->internal_prgs[i]);
   ctx->internal_prgs[i] = NULL;
 }
+
+if (ctx->internel_kernels[i]) {
+  cl_kernel_delete(ctx->built_in_kernels[i]);
+  ctx->built_in_kernels[i] = NULL;
+}
   }
 
+  cl_program_delete(ctx->built_in_prgs);
+  ctx->built_in_prgs = NULL;
+
   /* All object lists should have been freed. Otherwise, the reference counter
* of the context cannot be 0
*/
diff --git a/src/cl_context.h b/src/cl_context.h
index 65b1728..cba0a0a 100644
--- a/src/cl_context.h
+++ b/src/cl_context.h
@@ -102,6 +102,8 @@ struct _cl_context {
 /* All programs internal used, for example 
clEnqueuexxx api use */
   cl_kernel  internel_kernels[CL_INTERNAL_KERNEL_MAX];
 /* All kernels  for clenqueuexxx api, for 
example clEnqueuexxx api use */
+  cl_program built_in_prgs;  /*all built-in kernels belongs to this program 
only*/
+  cl_kernel  built_in_kernels[CL_INTERNAL_KERNEL_MAX];
   uint32_t ver; /* Gen version */
   struct _cl_context_prop props;
   cl_context_properties * prop_user; /* a copy of user passed context 
properties when create context */
diff --git a/src/cl_program.c b/src/cl_program.c
index 184d6b5..87a1e6b 100644
--- a/src/cl_program.c
+++ b/src/cl_program.c
@@ -209,6 +209,80 @@ error:
 }
 
 LOCAL cl_program
+cl_program_create_with_built_in_kernles(cl_context ctx,
+  cl_uint  num_devices,
+  const cl_device_id * devices,
+  const char * kernel_names,
+  cl_int * errcode_ret)
+{
+  cl_int err = CL_SUCCESS;
+
+  assert(ctx);
+  INVALID_DEVICE_IF (num_devices != 1);
+  INVALID_DEVICE_IF (devices == NULL);
+  INVALID_DEVICE_IF (devices[0] != ctx->device);
+
+  extern char cl_internal_built_in_kernel_str[];
+  extern int cl_internal_built_in_kernel_str_size;
+  char* p_built_in_kernel_str =cl_internal_built_in_kernel_str;
+  cl_int binary_status = CL_SUCCESS;
+
+  ctx->built_in_prgs = cl_program_create_from_binary(ctx, 1,
+  &ctx->device,
+  
(size_t*)&cl_internal_built_in_kernel_str_size,
+  (const unsigned char 
**)&p_built_in_kernel_str,
+  &binary_status, 
&err);
+
+  if (!ctx->built_in_prgs)
+return NULL;
+
+  err = cl_program_build(ctx->built_in_prgs, NULL);
+  if (err != CL_SUCCESS)
+return NULL;
+
+  ctx->built_in_prgs->is_built = 1;
+
+  char 

[Beignet] [PATCH 3/3] [opencl-1.2] add test case for API clCreateProgramWithBuiltInKernels.

2014-05-08 Thread xionghu . luo
From: Luo 

---
 utests/CMakeLists.txt   |  1 +
 utests/enqueue_built_in_kernels.cpp | 20 
 2 files changed, 21 insertions(+)
 create mode 100644 utests/enqueue_built_in_kernels.cpp

diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 415dcb6..90585d9 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -176,6 +176,7 @@ set (utests_sources
   enqueue_copy_buf.cpp
   enqueue_copy_buf_unaligned.cpp
   enqueue_fill_buf.cpp
+  enqueue_built_in_kernels.cpp
   utest_assert.cpp
   utest.cpp
   utest_file_map.cpp
diff --git a/utests/enqueue_built_in_kernels.cpp 
b/utests/enqueue_built_in_kernels.cpp
new file mode 100644
index 000..8b47bca
--- /dev/null
+++ b/utests/enqueue_built_in_kernels.cpp
@@ -0,0 +1,20 @@
+#include "utest_helper.hpp"
+
+void enqueue_built_in_kernels(void)
+{
+  char* built_in_kernel_names;
+  size_t built_in_kernels_size;
+  cl_int err = CL_SUCCESS;
+  size_t ret_sz;
+
+
+  OCL_CALL (clGetDeviceInfo, device, CL_DEVICE_BUILT_IN_KERNELS, 0, 0, 
&built_in_kernels_size);
+  built_in_kernel_names = (char* )malloc(built_in_kernels_size * sizeof(char) 
);
+  OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_BUILT_IN_KERNELS, 
built_in_kernels_size, (void*)built_in_kernel_names, &ret_sz);
+  OCL_ASSERT(ret_sz == built_in_kernels_size);
+  cl_program built_in_prog = clCreateProgramWithBuiltInKernels(ctx, 1, 
&device, built_in_kernel_names, &err);
+  OCL_ASSERT(built_in_prog != NULL);
+
+}
+
+MAKE_UTEST_FROM_FUNCTION(enqueue_built_in_kernels);
-- 
1.8.1.2

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 1/2] fix event related bugs.

2014-05-12 Thread xionghu . luo
From: Luo 

1. remove repeated user events in list.
2. missed braces in loops.
3. fix barrier event reference not incresed.
---
 src/cl_alloc.c |   1 +
 src/cl_event.c | 111 -
 src/cl_event.h |   4 +++
 3 files changed, 75 insertions(+), 41 deletions(-)

diff --git a/src/cl_alloc.c b/src/cl_alloc.c
index 20d5578..93d2e6a 100644
--- a/src/cl_alloc.c
+++ b/src/cl_alloc.c
@@ -71,6 +71,7 @@ cl_free(void *ptr)
 return;
   atomic_dec(&cl_alloc_n);
   free(ptr);
+  ptr = NULL;
 }
 
 LOCAL size_t
diff --git a/src/cl_event.c b/src/cl_event.c
index 727ee1f..101e735 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -231,6 +231,9 @@ cl_int cl_event_wait_events(cl_uint 
num_events_in_wait_list, const cl_event *eve
   }
 
   if(queue && queue->barrier_index > 0) {
+for(j=0; jwait_events_num; j++){
+  cl_event_add_ref(queue->wait_events[j]);  //add defer enqueue's wait 
event reference
+  }
 return CL_ENQUEUE_EXECUTE_DEFER;
   }
 
@@ -258,9 +261,10 @@ void cl_event_new_enqueue_callback(cl_event event,
   user_event *user_events, *u_ev;
   cl_command_queue queue = event->queue;
   cl_int i;
+  cl_int err = CL_SUCCESS;
   GET_QUEUE_THREAD_GPGPU(data->queue);
 
-  /* Allocate and inialize the structure itself */
+  /* Allocate and initialize the structure itself */
   TRY_ALLOC_NO_ERR (cb, CALLOC(enqueue_callback));
   cb->num_events = num_events_in_wait_list;
   TRY_ALLOC_NO_ERR (cb->wait_list, CALLOC_ARRAY(cl_event, 
num_events_in_wait_list));
@@ -276,22 +280,20 @@ void cl_event_new_enqueue_callback(cl_event event,
   node = queue->wait_events[i]->waits_head;
   if(node == NULL)
 queue->wait_events[i]->waits_head = cb;
-  else
+  else{
 while((node != cb) && node->next)
   node = node->next;
 if(node == cb)   //wait on dup user event
   continue;
 node->next = cb;
+  }
 
   /* Insert the user event to enqueue_callback's wait_user_events */
-  TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event));
-  u_ev->event = queue->wait_events[i];
-  u_ev->next = cb->wait_user_events;
-  cb->wait_user_events = u_ev;
+  TRY(cl_event_insert_user_event, &cb->wait_user_events, 
queue->wait_events[i]);
 }
   }
 
-  /* Find out all user events that events in event_wait_list wait */
+  /* Find out all user events that in event_wait_list wait */
   for(i=0; istatus <= CL_COMPLETE)
   continue;
@@ -309,31 +311,29 @@ void cl_event_new_enqueue_callback(cl_event event,
 node->next = cb;
   }
   /* Insert the user event to enqueue_callback's wait_user_events */
-  TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event));
-  u_ev->event = event_wait_list[i];
-  u_ev->next = cb->wait_user_events;
-  cb->wait_user_events = u_ev;
+  TRY(cl_event_insert_user_event, &cb->wait_user_events, 
event_wait_list[i]);
   cl_command_queue_insert_event(event->queue, event_wait_list[i]);
 } else if(event_wait_list[i]->enqueue_cb != NULL) {
   user_events = event_wait_list[i]->enqueue_cb->wait_user_events;
   while(user_events != NULL) {
 /* Insert the enqueue_callback to user event's  waits_tail */
 node = user_events->event->waits_head;
-while((node != cb) && node->next)
-  node = node->next;
-if(node == cb) {  //wait on dup user event
-  user_events = user_events->next;
-  continue;
+if(node == NULL)
+  event_wait_list[i]->waits_head = cb;
+else{
+  while((node != cb) && node->next)
+node = node->next;
+  if(node == cb) {  //wait on dup user event
+user_events = user_events->next;
+continue;
+  }
+  node->next = cb;
 }
-node->next = cb;
 
 /* Insert the user event to enqueue_callback's wait_user_events */
-TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event));
-u_ev->event = user_events->event;
-u_ev->next = cb->wait_user_events;
-cb->wait_user_events = u_ev;
+TRY(cl_event_insert_user_event, &cb->wait_user_events, 
user_events->event);
+cl_command_queue_insert_event(event->queue, user_events->event);
 user_events = user_events->next;
-cl_command_queue_insert_event(event->queue, event_wait_list[i]);
   }
 }
   }
@@ -363,7 +363,6 @@ error:
 void cl_event_set_status(cl_event event, cl_int status)
 {
   user_callback *user_cb;
-  user_event*u_ev, *u_ev_next;
   cl_int ret, i;
   cl_event evt;
 
@@ -419,23 +418,8 @@ void cl_event_set_status(cl_event event, cl_int status)
   /* Check all defer enqueue */
   enqueue_callback *cb, *enqueue_cb = event->waits_head;
   while(enqueue_cb) {
-/* Remove this user event in enqueue_cb */
-while(enqueue_cb->wait_user_events &&
-  enqueue_cb->wait_user_events->event == event) {
-  u_ev = enqueue_cb->wait_user_events;
-  enqueue_cb->wait_user_events = enqueue_cb->wait_user_e

[Beignet] [PATCH 2/2] move enqueue_copy_image kernels outside of runtime code.

2014-05-12 Thread xionghu . luo
From: Luo 

seperate the kernel code from host code to make it clean; build the
kernels offline by gbe_bin_generator to improve the performance.
---
 src/CMakeLists.txt |  23 ++-
 src/cl_context.h   |  24 ++-
 src/cl_gt_device.h |  23 ++-
 src/cl_mem.c   | 214 ++---
 src/kernels/cl_internal_copy_buf_align1.cl |   8 -
 src/kernels/cl_internal_copy_buf_align16.cl|   2 +-
 src/kernels/cl_internal_copy_buf_align4.cl |   2 +-
 src/kernels/cl_internal_copy_buf_rect.cl   |  15 ++
 .../cl_internal_copy_buf_unalign_dst_offset.cl |   2 +-
 .../cl_internal_copy_buf_unalign_same_offset.cl|   2 +-
 .../cl_internal_copy_buf_unalign_src_offset.cl |   2 +-
 src/kernels/cl_internal_copy_buffer_to_image_2d.cl |  18 ++
 src/kernels/cl_internal_copy_buffer_to_image_3d.cl |  19 ++
 src/kernels/cl_internal_copy_image_2d_to_2d.cl |  21 ++
 src/kernels/cl_internal_copy_image_2d_to_3d.cl |  22 +++
 src/kernels/cl_internal_copy_image_2d_to_buffer.cl |  19 ++
 src/kernels/cl_internal_copy_image_3d_to_2d.cl |  22 +++
 src/kernels/cl_internal_copy_image_3d_to_3d.cl |  23 +++
 src/kernels/cl_internal_copy_image_3d_to_buffer.cl |  22 +++
 19 files changed, 308 insertions(+), 175 deletions(-)
 delete mode 100644 src/kernels/cl_internal_copy_buf_align1.cl
 create mode 100644 src/kernels/cl_internal_copy_buf_rect.cl
 create mode 100644 src/kernels/cl_internal_copy_buffer_to_image_2d.cl
 create mode 100644 src/kernels/cl_internal_copy_buffer_to_image_3d.cl
 create mode 100644 src/kernels/cl_internal_copy_image_2d_to_2d.cl
 create mode 100644 src/kernels/cl_internal_copy_image_2d_to_3d.cl
 create mode 100644 src/kernels/cl_internal_copy_image_2d_to_buffer.cl
 create mode 100644 src/kernels/cl_internal_copy_image_3d_to_2d.cl
 create mode 100644 src/kernels/cl_internal_copy_image_3d_to_3d.cl
 create mode 100644 src/kernels/cl_internal_copy_image_3d_to_buffer.cl

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 8164a44..ecc04ab 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -17,11 +17,30 @@ foreach (KF ${KERNEL_FILES})
 endforeach (KF)
 endmacro (MakeKernelBinStr)
 
+macro (MakeBuiltInKernelStr KERNEL_PATH KERNEL_FILES)
+  set (output_file ${KERNEL_PATH}/${BUILT_IN_NAME}.cl)
+  set (file_content)
+  file (REMOVE ${output_file})
+  foreach (KF ${KERNEL_NAMES})
+set (input_file ${KERNEL_PATH}/${KF}.cl)
+file(READ ${input_file} file_content )
+STRING(REGEX REPLACE ";" ";" file_content "${file_content}")
+file(APPEND ${output_file} ${file_content})
+  endforeach (KF)
+endmacro (MakeBuiltInKernelStr)
+
 set (KERNEL_STR_FILES)
-set (KERNEL_NAMES cl_internal_copy_buf_align1 cl_internal_copy_buf_align4
+set (KERNEL_NAMES cl_internal_copy_buf_align4
 cl_internal_copy_buf_align16 cl_internal_copy_buf_unalign_same_offset
-cl_internal_copy_buf_unalign_dst_offset 
cl_internal_copy_buf_unalign_src_offset)
+cl_internal_copy_buf_unalign_dst_offset cl_internal_copy_buf_unalign_src_offset
+cl_internal_copy_buf_rect cl_internal_copy_image_2d_to_2d 
cl_internal_copy_image_3d_to_2d
+cl_internal_copy_image_2d_to_3d cl_internal_copy_image_3d_to_3d
+cl_internal_copy_image_2d_to_buffer cl_internal_copy_image_3d_to_buffer
+cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_3d)
+set (BUILT_IN_NAME  cl_internal_built_in_kernel)
+MakeBuiltInKernelStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}")
 MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}")
+MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${BUILT_IN_NAME}")
 
 set(OPENCL_SRC
 ${KERNEL_STR_FILES}
diff --git a/src/cl_context.h b/src/cl_context.h
index 782a9af..24281be 100644
--- a/src/cl_context.h
+++ b/src/cl_context.h
@@ -46,14 +46,22 @@ enum _cl_internal_ker_type {
   CL_ENQUEUE_COPY_BUFFER_UNALIGN_DST_OFFSET,
   CL_ENQUEUE_COPY_BUFFER_UNALIGN_SRC_OFFSET,
   CL_ENQUEUE_COPY_BUFFER_RECT,
-  CL_ENQUEUE_COPY_IMAGE_0, //copy image 2d to image 2d
-  CL_ENQUEUE_COPY_IMAGE_1, //copy image 3d to image 2d
-  CL_ENQUEUE_COPY_IMAGE_2, //copy image 2d to image 3d
-  CL_ENQUEUE_COPY_IMAGE_3, //copy image 3d to image 3d
-  CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_0,   //copy image 2d to buffer
-  CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_1,   //copy image 3d tobuffer
-  CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_0,   //copy buffer to image 2d
-  CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_1,   //copy buffer to image 3d
+  CL_ENQUEUE_COPY_IMAGE_2D_TO_2D, //copy image 2d to image 2d
+  CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, //copy image 3d to image 2d
+  CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, //copy image 2d to image 3d
+  CL_ENQUEUE_COPY_IMAGE_3D_TO_3D, //copy image 3d to image 3d
+  CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER,   //copy image 2d to buffer
+  CL_ENQUEUE_COPY_IMAGE_3D_TO

[Beignet] [PATCH V1 1/2] fix event related bugs.

2014-05-12 Thread xionghu . luo
From: Luo 

1. remove repeated user events in list.
2. missed braces in loops.
3. fix barrier event reference not incresed.
---
 src/cl_alloc.c |   1 +
 src/cl_event.c | 111 -
 src/cl_event.h |   4 +++
 3 files changed, 75 insertions(+), 41 deletions(-)

diff --git a/src/cl_alloc.c b/src/cl_alloc.c
index 20d5578..93d2e6a 100644
--- a/src/cl_alloc.c
+++ b/src/cl_alloc.c
@@ -71,6 +71,7 @@ cl_free(void *ptr)
 return;
   atomic_dec(&cl_alloc_n);
   free(ptr);
+  ptr = NULL;
 }
 
 LOCAL size_t
diff --git a/src/cl_event.c b/src/cl_event.c
index 727ee1f..101e735 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -231,6 +231,9 @@ cl_int cl_event_wait_events(cl_uint 
num_events_in_wait_list, const cl_event *eve
   }
 
   if(queue && queue->barrier_index > 0) {
+for(j=0; jwait_events_num; j++){
+  cl_event_add_ref(queue->wait_events[j]);  //add defer enqueue's wait 
event reference
+  }
 return CL_ENQUEUE_EXECUTE_DEFER;
   }
 
@@ -258,9 +261,10 @@ void cl_event_new_enqueue_callback(cl_event event,
   user_event *user_events, *u_ev;
   cl_command_queue queue = event->queue;
   cl_int i;
+  cl_int err = CL_SUCCESS;
   GET_QUEUE_THREAD_GPGPU(data->queue);
 
-  /* Allocate and inialize the structure itself */
+  /* Allocate and initialize the structure itself */
   TRY_ALLOC_NO_ERR (cb, CALLOC(enqueue_callback));
   cb->num_events = num_events_in_wait_list;
   TRY_ALLOC_NO_ERR (cb->wait_list, CALLOC_ARRAY(cl_event, 
num_events_in_wait_list));
@@ -276,22 +280,20 @@ void cl_event_new_enqueue_callback(cl_event event,
   node = queue->wait_events[i]->waits_head;
   if(node == NULL)
 queue->wait_events[i]->waits_head = cb;
-  else
+  else{
 while((node != cb) && node->next)
   node = node->next;
 if(node == cb)   //wait on dup user event
   continue;
 node->next = cb;
+  }
 
   /* Insert the user event to enqueue_callback's wait_user_events */
-  TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event));
-  u_ev->event = queue->wait_events[i];
-  u_ev->next = cb->wait_user_events;
-  cb->wait_user_events = u_ev;
+  TRY(cl_event_insert_user_event, &cb->wait_user_events, 
queue->wait_events[i]);
 }
   }
 
-  /* Find out all user events that events in event_wait_list wait */
+  /* Find out all user events that in event_wait_list wait */
   for(i=0; istatus <= CL_COMPLETE)
   continue;
@@ -309,31 +311,29 @@ void cl_event_new_enqueue_callback(cl_event event,
 node->next = cb;
   }
   /* Insert the user event to enqueue_callback's wait_user_events */
-  TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event));
-  u_ev->event = event_wait_list[i];
-  u_ev->next = cb->wait_user_events;
-  cb->wait_user_events = u_ev;
+  TRY(cl_event_insert_user_event, &cb->wait_user_events, 
event_wait_list[i]);
   cl_command_queue_insert_event(event->queue, event_wait_list[i]);
 } else if(event_wait_list[i]->enqueue_cb != NULL) {
   user_events = event_wait_list[i]->enqueue_cb->wait_user_events;
   while(user_events != NULL) {
 /* Insert the enqueue_callback to user event's  waits_tail */
 node = user_events->event->waits_head;
-while((node != cb) && node->next)
-  node = node->next;
-if(node == cb) {  //wait on dup user event
-  user_events = user_events->next;
-  continue;
+if(node == NULL)
+  event_wait_list[i]->waits_head = cb;
+else{
+  while((node != cb) && node->next)
+node = node->next;
+  if(node == cb) {  //wait on dup user event
+user_events = user_events->next;
+continue;
+  }
+  node->next = cb;
 }
-node->next = cb;
 
 /* Insert the user event to enqueue_callback's wait_user_events */
-TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event));
-u_ev->event = user_events->event;
-u_ev->next = cb->wait_user_events;
-cb->wait_user_events = u_ev;
+TRY(cl_event_insert_user_event, &cb->wait_user_events, 
user_events->event);
+cl_command_queue_insert_event(event->queue, user_events->event);
 user_events = user_events->next;
-cl_command_queue_insert_event(event->queue, event_wait_list[i]);
   }
 }
   }
@@ -363,7 +363,6 @@ error:
 void cl_event_set_status(cl_event event, cl_int status)
 {
   user_callback *user_cb;
-  user_event*u_ev, *u_ev_next;
   cl_int ret, i;
   cl_event evt;
 
@@ -419,23 +418,8 @@ void cl_event_set_status(cl_event event, cl_int status)
   /* Check all defer enqueue */
   enqueue_callback *cb, *enqueue_cb = event->waits_head;
   while(enqueue_cb) {
-/* Remove this user event in enqueue_cb */
-while(enqueue_cb->wait_user_events &&
-  enqueue_cb->wait_user_events->event == event) {
-  u_ev = enqueue_cb->wait_user_events;
-  enqueue_cb->wait_user_events = enqueue_cb->wait_user_e

[Beignet] [PATCH V1 2/2] move enqueue_copy_image kernels outside of runtime code.

2014-05-12 Thread xionghu . luo
From: Luo 

seperate the kernel code from host code to make it clean; build the
kernels offline by gbe_bin_generator to improve the performance.
---
 src/CMakeLists.txt |  23 ++-
 src/cl_context.h   |  16 +-
 src/cl_mem.c   | 214 ++---
 src/kernels/cl_internal_copy_buf_align1.cl |   8 -
 src/kernels/cl_internal_copy_buf_align16.cl|   2 +-
 src/kernels/cl_internal_copy_buf_align4.cl |   2 +-
 src/kernels/cl_internal_copy_buf_rect.cl   |  15 ++
 .../cl_internal_copy_buf_unalign_dst_offset.cl |   2 +-
 .../cl_internal_copy_buf_unalign_same_offset.cl|   2 +-
 .../cl_internal_copy_buf_unalign_src_offset.cl |   2 +-
 src/kernels/cl_internal_copy_buffer_to_image_2d.cl |  18 ++
 src/kernels/cl_internal_copy_buffer_to_image_3d.cl |  19 ++
 src/kernels/cl_internal_copy_image_2d_to_2d.cl |  21 ++
 src/kernels/cl_internal_copy_image_2d_to_3d.cl |  22 +++
 src/kernels/cl_internal_copy_image_2d_to_buffer.cl |  19 ++
 src/kernels/cl_internal_copy_image_3d_to_2d.cl |  22 +++
 src/kernels/cl_internal_copy_image_3d_to_3d.cl |  23 +++
 src/kernels/cl_internal_copy_image_3d_to_buffer.cl |  22 +++
 18 files changed, 278 insertions(+), 174 deletions(-)
 delete mode 100644 src/kernels/cl_internal_copy_buf_align1.cl
 create mode 100644 src/kernels/cl_internal_copy_buf_rect.cl
 create mode 100644 src/kernels/cl_internal_copy_buffer_to_image_2d.cl
 create mode 100644 src/kernels/cl_internal_copy_buffer_to_image_3d.cl
 create mode 100644 src/kernels/cl_internal_copy_image_2d_to_2d.cl
 create mode 100644 src/kernels/cl_internal_copy_image_2d_to_3d.cl
 create mode 100644 src/kernels/cl_internal_copy_image_2d_to_buffer.cl
 create mode 100644 src/kernels/cl_internal_copy_image_3d_to_2d.cl
 create mode 100644 src/kernels/cl_internal_copy_image_3d_to_3d.cl
 create mode 100644 src/kernels/cl_internal_copy_image_3d_to_buffer.cl

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 8164a44..ecc04ab 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -17,11 +17,30 @@ foreach (KF ${KERNEL_FILES})
 endforeach (KF)
 endmacro (MakeKernelBinStr)
 
+macro (MakeBuiltInKernelStr KERNEL_PATH KERNEL_FILES)
+  set (output_file ${KERNEL_PATH}/${BUILT_IN_NAME}.cl)
+  set (file_content)
+  file (REMOVE ${output_file})
+  foreach (KF ${KERNEL_NAMES})
+set (input_file ${KERNEL_PATH}/${KF}.cl)
+file(READ ${input_file} file_content )
+STRING(REGEX REPLACE ";" ";" file_content "${file_content}")
+file(APPEND ${output_file} ${file_content})
+  endforeach (KF)
+endmacro (MakeBuiltInKernelStr)
+
 set (KERNEL_STR_FILES)
-set (KERNEL_NAMES cl_internal_copy_buf_align1 cl_internal_copy_buf_align4
+set (KERNEL_NAMES cl_internal_copy_buf_align4
 cl_internal_copy_buf_align16 cl_internal_copy_buf_unalign_same_offset
-cl_internal_copy_buf_unalign_dst_offset 
cl_internal_copy_buf_unalign_src_offset)
+cl_internal_copy_buf_unalign_dst_offset cl_internal_copy_buf_unalign_src_offset
+cl_internal_copy_buf_rect cl_internal_copy_image_2d_to_2d 
cl_internal_copy_image_3d_to_2d
+cl_internal_copy_image_2d_to_3d cl_internal_copy_image_3d_to_3d
+cl_internal_copy_image_2d_to_buffer cl_internal_copy_image_3d_to_buffer
+cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_3d)
+set (BUILT_IN_NAME  cl_internal_built_in_kernel)
+MakeBuiltInKernelStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}")
 MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}")
+MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${BUILT_IN_NAME}")
 
 set(OPENCL_SRC
 ${KERNEL_STR_FILES}
diff --git a/src/cl_context.h b/src/cl_context.h
index 782a9af..82d3217 100644
--- a/src/cl_context.h
+++ b/src/cl_context.h
@@ -46,14 +46,14 @@ enum _cl_internal_ker_type {
   CL_ENQUEUE_COPY_BUFFER_UNALIGN_DST_OFFSET,
   CL_ENQUEUE_COPY_BUFFER_UNALIGN_SRC_OFFSET,
   CL_ENQUEUE_COPY_BUFFER_RECT,
-  CL_ENQUEUE_COPY_IMAGE_0, //copy image 2d to image 2d
-  CL_ENQUEUE_COPY_IMAGE_1, //copy image 3d to image 2d
-  CL_ENQUEUE_COPY_IMAGE_2, //copy image 2d to image 3d
-  CL_ENQUEUE_COPY_IMAGE_3, //copy image 3d to image 3d
-  CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_0,   //copy image 2d to buffer
-  CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_1,   //copy image 3d tobuffer
-  CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_0,   //copy buffer to image 2d
-  CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_1,   //copy buffer to image 3d
+  CL_ENQUEUE_COPY_IMAGE_2D_TO_2D, //copy image 2d to image 2d
+  CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, //copy image 3d to image 2d
+  CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, //copy image 2d to image 3d
+  CL_ENQUEUE_COPY_IMAGE_3D_TO_3D, //copy image 3d to image 3d
+  CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER,   //copy image 2d to buffer
+  CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER,   //copy image 3d tobuffer
+  CL_ENQUEUE_COPY_BUFFER_T

[Beignet] [PATCH] add [opencl-1.2] API clCreateSubDevice.

2014-05-12 Thread xionghu . luo
From: Luo 

creates an array of sub-devices that each reference a non-intersecting
set of compute units within in_device, according to a partition scheme
given by properties.
---
 src/cl_api.c   | 10 --
 src/cl_device_id.c |  6 ++
 src/cl_device_id.h |  7 +++
 src/cl_gt_device.h |  7 ++-
 4 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/src/cl_api.c b/src/cl_api.c
index 9c22819..2077d02 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -242,8 +242,14 @@ clCreateSubDevices(cl_device_id 
in_device,
cl_device_id *   out_devices,
cl_uint *num_devices_ret)
 {
-  NOT_IMPLEMENTED;
-  return 0;
+  /* Check parameter consistency */
+  if (UNLIKELY(out_devices == NULL && num_devices_ret == NULL))
+return CL_INVALID_VALUE;
+  if (UNLIKELY(in_device == NULL && properties == NULL))
+return CL_INVALID_VALUE;
+
+  *num_devices_ret = 0;
+  return CL_SUCCESS;
 }
 
 cl_int
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 2b443c6..37f49be 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -346,6 +346,12 @@ cl_get_device_info(cl_device_id device,
 DECL_STRING_FIELD(OPENCL_C_VERSION, opencl_c_version)
 DECL_STRING_FIELD(EXTENSIONS, extensions);
 DECL_STRING_FIELD(BUILT_IN_KERNELS, built_in_kernels)
+DECL_FIELD(PARENT_DEVICE, parent_device)
+DECL_FIELD(PARTITION_MAX_SUB_DEVICES, partition_max_sub_device)
+DECL_FIELD(PARTITION_PROPERTIES, partition_property)
+DECL_FIELD(PARTITION_AFFINITY_DOMAIN, affinity_domain)
+DECL_FIELD(PARTITION_TYPE, partition_type)
+DECL_FIELD(REFERENCE_COUNT, device_reference_count)
 
 case CL_DRIVER_VERSION:
   if (param_value_size_ret) {
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
index 5f7c9fe..6f8d25f 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -97,6 +97,13 @@ struct _cl_device_id {
   /* Kernel specific info that we're assigning statically */
   size_t wg_sz;
   size_t preferred_wg_sz_mul;
+  /* SubDevice specific info */
+  cl_device_id parent_device;
+  cl_uint  partition_max_sub_device;
+  cl_device_partition_property partition_property[3];
+  cl_device_affinity_domainaffinity_domain;
+  cl_device_partition_property partition_type[3];
+  cl_uint  device_reference_count;
 };
 
 /* Get a device from the given platform */
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index 110988a..88decd7 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -78,5 +78,10 @@ DECL_INFO_STRING(extensions, "")
 DECL_INFO_STRING(built_in_kernels, "")
 DECL_INFO_STRING(driver_version, LIBCL_DRIVER_VERSION_STRING)
 #undef DECL_INFO_STRING
-
+.parent_device = NULL,
+.partition_max_sub_device = 1,
+.partition_property = {0},
+.affinity_domain = 0,
+.partition_type = {0},
+.device_reference_count = 1,
 
-- 
1.8.1.2

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] remove the code of saving the llvm bitcode to file, replace it with llvm::Module pointer.

2014-05-14 Thread xionghu . luo
From: Luo 

  Save the Act and module pointer to GenProgram, delete it in the
  destructor.
---
 backend/src/backend/gen_program.cpp | 31 +---
 backend/src/backend/gen_program.hpp |  4 +++-
 backend/src/backend/program.cpp | 47 +
 backend/src/backend/program.h   |  3 ++-
 backend/src/backend/program.hpp |  2 +-
 backend/src/llvm/llvm_to_gen.cpp| 12 ++
 backend/src/llvm/llvm_to_gen.hpp|  2 +-
 src/cl_program.c|  2 +-
 8 files changed, 55 insertions(+), 48 deletions(-)

diff --git a/backend/src/backend/gen_program.cpp 
b/backend/src/backend/gen_program.cpp
index 52db904..a311c71 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -22,6 +22,17 @@
  * \author Benjamin Segovia 
  */
 
+#include "llvm/Config/config.h"
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/DataLayout.h"
+#else
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/DataLayout.h"
+#endif  /* LLVM_VERSION_MINOR <= 2 */
+
 #include "backend/program.h"
 #include "backend/gen_program.h"
 #include "backend/gen_program.hpp"
@@ -33,6 +44,8 @@
 #include "ir/unit.hpp"
 #include "llvm/llvm_to_gen.hpp"
 
+#include 
+
 #include 
 #include 
 #include 
@@ -72,7 +85,17 @@ namespace gbe {
 fclose(f);
   }
 
-  GenProgram::~GenProgram(void) {}
+  GenProgram::~GenProgram(void){
+if(module){
+  delete (llvm::Module*)module;
+  module = NULL;
+}
+
+if(Act){
+  delete (clang::CodeGenAction*)Act;
+  Act = NULL;
+}
+  }
 
   /*! We must avoid spilling at all cost with Gen */
   static const struct CodeGenStrategy {
@@ -177,16 +200,18 @@ namespace gbe {
 
   static gbe_program genProgramNewFromLLVM(uint32_t deviceID,
const char *fileName,
+   const void* module,
+   const void* act,
size_t stringSize,
char *err,
size_t *errSize,
int optLevel)
   {
 using namespace gbe;
-GenProgram *program = GBE_NEW(GenProgram, deviceID);
+GenProgram *program = GBE_NEW(GenProgram, deviceID, module, act);
 std::string error;
 // Try to compile the program
-if (program->buildFromLLVMFile(fileName, error, optLevel) == false) {
+if (program->buildFromLLVMFile(fileName, module, error, optLevel) == 
false) {
   if (err != NULL && errSize != NULL && stringSize > 0u) {
 const size_t msgSize = std::min(error.size(), stringSize-1u);
 std::memcpy(err, error.c_str(), msgSize);
diff --git a/backend/src/backend/gen_program.hpp 
b/backend/src/backend/gen_program.hpp
index ea54b49..b17dfc8 100644
--- a/backend/src/backend/gen_program.hpp
+++ b/backend/src/backend/gen_program.hpp
@@ -58,7 +58,7 @@ namespace gbe
   {
   public:
 /*! Create an empty program */
-GenProgram(uint32_t deviceID) : deviceID(deviceID) {}
+GenProgram(uint32_t deviceID, const void* mod = NULL, const void* act = 
NULL) : deviceID(deviceID),module((void*)mod), Act((void*)act) {}
 /*! Current device ID*/
 uint32_t deviceID;
 /*! Destroy the program */
@@ -69,6 +69,8 @@ namespace gbe
 virtual Kernel *allocateKernel(const std::string &name) {
   return GBE_NEW(GenKernel, name);
 }
+void* module;
+void* Act;
 /*! Use custom allocators */
 GBE_CLASS(GenProgram);
   };
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index bdc7d34..6745d70 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -34,6 +34,7 @@
 #include "llvm/Config/config.h"
 #include "llvm/Support/Threading.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Transforms/Utils/Cloning.h"
 #include 
 #include 
 #include 
@@ -102,9 +103,9 @@ namespace gbe {
 
   BVAR(OCL_OUTPUT_GEN_IR, false);
 
-  bool Program::buildFromLLVMFile(const char *fileName, std::string &error, 
int optLevel) {
+  bool Program::buildFromLLVMFile(const char *fileName, const void* module, 
std::string &error, int optLevel) {
 ir::Unit *unit = new ir::Unit();
-if (llvmToGen(*unit, fileName, optLevel) == false) {
+if (llvmToGen(*unit, fileName, module, optLevel) == false) {
   error = std::string(fileName) + " not found";
   return false;
 }
@@ -113,7 +114,7 @@ namespace gbe {
 if(!unit->getValid()) {
   delete unit;   //clear unit
   unit = new ir::Unit();
-  llvmToGen(*unit, fileName, 0);  //suppose file exists and llvmToGen will 
not return false.
+  llvmToGen(*unit, fileName, module, 0);  //suppose file exists and 
llvmToGen will not return false.
 }
 assert(unit->getValid());
 this->buildFro

[Beignet] [PATCH] remove the code of saving the llvm bitcode to file, replace it with llvm::Module pointer.

2014-05-15 Thread xionghu . luo
From: Luo 

  Save the Act and module pointer to GenProgram, delete it in the
  destructor.
---
 backend/src/backend/gen_program.cpp | 31 +---
 backend/src/backend/gen_program.hpp |  4 +++-
 backend/src/backend/program.cpp | 47 +
 backend/src/backend/program.h   |  3 ++-
 backend/src/backend/program.hpp |  2 +-
 backend/src/llvm/llvm_to_gen.cpp| 12 ++
 backend/src/llvm/llvm_to_gen.hpp|  2 +-
 src/cl_program.c|  2 +-
 8 files changed, 55 insertions(+), 48 deletions(-)

diff --git a/backend/src/backend/gen_program.cpp 
b/backend/src/backend/gen_program.cpp
index 52db904..a311c71 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -22,6 +22,17 @@
  * \author Benjamin Segovia 
  */
 
+#include "llvm/Config/config.h"
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/DataLayout.h"
+#else
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/DataLayout.h"
+#endif  /* LLVM_VERSION_MINOR <= 2 */
+
 #include "backend/program.h"
 #include "backend/gen_program.h"
 #include "backend/gen_program.hpp"
@@ -33,6 +44,8 @@
 #include "ir/unit.hpp"
 #include "llvm/llvm_to_gen.hpp"
 
+#include 
+
 #include 
 #include 
 #include 
@@ -72,7 +85,17 @@ namespace gbe {
 fclose(f);
   }
 
-  GenProgram::~GenProgram(void) {}
+  GenProgram::~GenProgram(void){
+if(module){
+  delete (llvm::Module*)module;
+  module = NULL;
+}
+
+if(Act){
+  delete (clang::CodeGenAction*)Act;
+  Act = NULL;
+}
+  }
 
   /*! We must avoid spilling at all cost with Gen */
   static const struct CodeGenStrategy {
@@ -177,16 +200,18 @@ namespace gbe {
 
   static gbe_program genProgramNewFromLLVM(uint32_t deviceID,
const char *fileName,
+   const void* module,
+   const void* act,
size_t stringSize,
char *err,
size_t *errSize,
int optLevel)
   {
 using namespace gbe;
-GenProgram *program = GBE_NEW(GenProgram, deviceID);
+GenProgram *program = GBE_NEW(GenProgram, deviceID, module, act);
 std::string error;
 // Try to compile the program
-if (program->buildFromLLVMFile(fileName, error, optLevel) == false) {
+if (program->buildFromLLVMFile(fileName, module, error, optLevel) == 
false) {
   if (err != NULL && errSize != NULL && stringSize > 0u) {
 const size_t msgSize = std::min(error.size(), stringSize-1u);
 std::memcpy(err, error.c_str(), msgSize);
diff --git a/backend/src/backend/gen_program.hpp 
b/backend/src/backend/gen_program.hpp
index ea54b49..b17dfc8 100644
--- a/backend/src/backend/gen_program.hpp
+++ b/backend/src/backend/gen_program.hpp
@@ -58,7 +58,7 @@ namespace gbe
   {
   public:
 /*! Create an empty program */
-GenProgram(uint32_t deviceID) : deviceID(deviceID) {}
+GenProgram(uint32_t deviceID, const void* mod = NULL, const void* act = 
NULL) : deviceID(deviceID),module((void*)mod), Act((void*)act) {}
 /*! Current device ID*/
 uint32_t deviceID;
 /*! Destroy the program */
@@ -69,6 +69,8 @@ namespace gbe
 virtual Kernel *allocateKernel(const std::string &name) {
   return GBE_NEW(GenKernel, name);
 }
+void* module;
+void* Act;
 /*! Use custom allocators */
 GBE_CLASS(GenProgram);
   };
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index bdc7d34..6745d70 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -34,6 +34,7 @@
 #include "llvm/Config/config.h"
 #include "llvm/Support/Threading.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Transforms/Utils/Cloning.h"
 #include 
 #include 
 #include 
@@ -102,9 +103,9 @@ namespace gbe {
 
   BVAR(OCL_OUTPUT_GEN_IR, false);
 
-  bool Program::buildFromLLVMFile(const char *fileName, std::string &error, 
int optLevel) {
+  bool Program::buildFromLLVMFile(const char *fileName, const void* module, 
std::string &error, int optLevel) {
 ir::Unit *unit = new ir::Unit();
-if (llvmToGen(*unit, fileName, optLevel) == false) {
+if (llvmToGen(*unit, fileName, module, optLevel) == false) {
   error = std::string(fileName) + " not found";
   return false;
 }
@@ -113,7 +114,7 @@ namespace gbe {
 if(!unit->getValid()) {
   delete unit;   //clear unit
   unit = new ir::Unit();
-  llvmToGen(*unit, fileName, 0);  //suppose file exists and llvmToGen will 
not return false.
+  llvmToGen(*unit, fileName, module, 0);  //suppose file exists and 
llvmToGen will not return false.
 }
 assert(unit->getValid());
 this->buildFro

[Beignet] [fix merge issue 1/2] add [opencl-1.2] API clCreateProgramWithBuiltInKernels.

2014-05-22 Thread xionghu . luo
From: Luo 

This API creates a built-in program object for a context, and loads the
built-in kernels into this program object.
---
 backend/src/ir/image.cpp |  5 
 src/cl_api.c | 24 
 src/cl_context.c |  8 ++
 src/cl_context.h |  2 ++
 src/cl_gt_device.h   | 24 +++-
 src/cl_program.c | 74 
 src/cl_program.h |  7 +
 7 files changed, 143 insertions(+), 1 deletion(-)

diff --git a/backend/src/ir/image.cpp b/backend/src/ir/image.cpp
index 8c34d70..87bafc0 100644
--- a/backend/src/ir/image.cpp
+++ b/backend/src/ir/image.cpp
@@ -125,7 +125,12 @@ namespace ir {
 
   void ImageSet::getData(struct ImageInfo *imageInfos) const {
   for(auto &it : regMap)
+  {
+int t = it.second->idx - gbe_get_image_base_index();
+if(t < 0)
+  continue;
 imageInfos[it.second->idx - gbe_get_image_base_index()] = *it.second;
+  }
   }
 
   ImageSet::~ImageSet() {
diff --git a/src/cl_api.c b/src/cl_api.c
index 4b1deda..3a77dcd 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -816,6 +816,30 @@ error:
 *errcode_ret = err;
   return program;
 }
+
+cl_program
+clCreateProgramWithBuiltInKernels(cl_context   context,
+  cl_uint  num_devices,
+  const cl_device_id * device_list,
+  const char * kernel_names,
+  cl_int * errcode_ret)
+{
+  cl_program program = NULL;
+  cl_int err = CL_SUCCESS;
+
+  CHECK_CONTEXT (context);
+  INVALID_VALUE_IF (kernel_names == NULL);
+  program = cl_program_create_with_built_in_kernles(context,
+num_devices,
+device_list,
+kernel_names,
+&err);
+error:
+  if (errcode_ret)
+*errcode_ret = err;
+  return program;
+}
+
 cl_int
 clRetainProgram(cl_program program)
 {
diff --git a/src/cl_context.c b/src/cl_context.c
index 293af94..6172ecc 100644
--- a/src/cl_context.c
+++ b/src/cl_context.c
@@ -206,8 +206,16 @@ cl_context_delete(cl_context ctx)
   cl_program_delete(ctx->internal_prgs[i]);
   ctx->internal_prgs[i] = NULL;
 }
+
+if (ctx->internel_kernels[i]) {
+  cl_kernel_delete(ctx->built_in_kernels[i]);
+  ctx->built_in_kernels[i] = NULL;
+}
   }
 
+  cl_program_delete(ctx->built_in_prgs);
+  ctx->built_in_prgs = NULL;
+
   /* All object lists should have been freed. Otherwise, the reference counter
* of the context cannot be 0
*/
diff --git a/src/cl_context.h b/src/cl_context.h
index 4de954c..e037634 100644
--- a/src/cl_context.h
+++ b/src/cl_context.h
@@ -103,6 +103,8 @@ struct _cl_context {
 /* All programs internal used, for example 
clEnqueuexxx api use */
   cl_kernel  internel_kernels[CL_INTERNAL_KERNEL_MAX];
 /* All kernels  for clenqueuexxx api, for 
example clEnqueuexxx api use */
+  cl_program built_in_prgs;  /*all built-in kernels belongs to this program 
only*/
+  cl_kernel  built_in_kernels[CL_INTERNAL_KERNEL_MAX];
   uint32_t ver; /* Gen version */
   struct _cl_context_prop props;
   cl_context_properties * prop_user; /* a copy of user passed context 
properties when create context */
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index 7e45b4e..3e2502c 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -75,7 +75,29 @@ DECL_INFO_STRING(version, LIBCL_VERSION_STRING)
 DECL_INFO_STRING(profile, "FULL_PROFILE")
 DECL_INFO_STRING(opencl_c_version, LIBCL_C_VERSION_STRING)
 DECL_INFO_STRING(extensions, "")
-DECL_INFO_STRING(built_in_kernels, "")
+DECL_INFO_STRING(built_in_kernels, "__cl_copy_region_align4;"
+   "__cl_copy_region_align16;"
+   "__cl_cpy_region_unalign_same_offset;"
+   "__cl_copy_region_unalign_dst_offset;"
+   "__cl_copy_region_unalign_src_offset;"
+   "__cl_copy_buffer_rect;"
+   "__cl_copy_image_2d_to_2d;"
+   "__cl_copy_image_3d_to_2d;"
+   "__cl_copy_image_2d_to_3d;"
+   "__cl_copy_image_3d_to_3d;"
+   "__cl_copy_image_2d_to_buffer;"
+   "__cl_copy_image_3d_to_buffer;"
+   "__cl_copy_buffer_to_image_2d;"
+   "__cl_copy_buffer_to_image_3d;"
+   "__cl_fill_region_unalign;"
+   "__cl_fill_region_align2;"
+

[Beignet] [fix merge issue 2/2] add[opencl-1.2] test case for API clCreateProgramWithBuiltInKernels.

2014-05-22 Thread xionghu . luo
From: Luo 

---
 utests/CMakeLists.txt   |  1 +
 utests/enqueue_built_in_kernels.cpp | 20 
 2 files changed, 21 insertions(+)
 create mode 100644 utests/enqueue_built_in_kernels.cpp

diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index cc8c497..5f0649f 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -176,6 +176,7 @@ set (utests_sources
   enqueue_copy_buf.cpp
   enqueue_copy_buf_unaligned.cpp
   enqueue_fill_buf.cpp
+  enqueue_built_in_kernels.cpp
   utest_assert.cpp
   utest.cpp
   utest_file_map.cpp
diff --git a/utests/enqueue_built_in_kernels.cpp 
b/utests/enqueue_built_in_kernels.cpp
new file mode 100644
index 000..8b47bca
--- /dev/null
+++ b/utests/enqueue_built_in_kernels.cpp
@@ -0,0 +1,20 @@
+#include "utest_helper.hpp"
+
+void enqueue_built_in_kernels(void)
+{
+  char* built_in_kernel_names;
+  size_t built_in_kernels_size;
+  cl_int err = CL_SUCCESS;
+  size_t ret_sz;
+
+
+  OCL_CALL (clGetDeviceInfo, device, CL_DEVICE_BUILT_IN_KERNELS, 0, 0, 
&built_in_kernels_size);
+  built_in_kernel_names = (char* )malloc(built_in_kernels_size * sizeof(char) 
);
+  OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_BUILT_IN_KERNELS, 
built_in_kernels_size, (void*)built_in_kernel_names, &ret_sz);
+  OCL_ASSERT(ret_sz == built_in_kernels_size);
+  cl_program built_in_prog = clCreateProgramWithBuiltInKernels(ctx, 1, 
&device, built_in_kernel_names, &err);
+  OCL_ASSERT(built_in_prog != NULL);
+
+}
+
+MAKE_UTEST_FROM_FUNCTION(enqueue_built_in_kernels);
-- 
1.8.1.2

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH V2] remove the code of saving the llvm bitcode to file, replace it with llvm::Module

2014-05-22 Thread xionghu . luo
From: Luo 

Save the LLVMContext and module pointer to GenProgram, delete it in the 
destructor.

Signed-off-by: Luo 
---
 backend/src/backend/gen_program.cpp | 31 ---
 backend/src/backend/gen_program.hpp |  4 ++-
 backend/src/backend/program.cpp | 50 -
 backend/src/backend/program.h   |  3 ++-
 backend/src/backend/program.hpp |  2 +-
 backend/src/llvm/llvm_to_gen.cpp| 16 +++-
 backend/src/llvm/llvm_to_gen.hpp|  2 +-
 src/cl_program.c|  2 +-
 8 files changed, 62 insertions(+), 48 deletions(-)

diff --git a/backend/src/backend/gen_program.cpp 
b/backend/src/backend/gen_program.cpp
index 52db904..74b6fa1 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -22,6 +22,17 @@
  * \author Benjamin Segovia 
  */
 
+#include "llvm/Config/config.h"
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/DataLayout.h"
+#else
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/DataLayout.h"
+#endif  /* LLVM_VERSION_MINOR <= 2 */
+
 #include "backend/program.h"
 #include "backend/gen_program.h"
 #include "backend/gen_program.hpp"
@@ -33,6 +44,8 @@
 #include "ir/unit.hpp"
 #include "llvm/llvm_to_gen.hpp"
 
+#include 
+
 #include 
 #include 
 #include 
@@ -72,7 +85,17 @@ namespace gbe {
 fclose(f);
   }
 
-  GenProgram::~GenProgram(void) {}
+  GenProgram::~GenProgram(void){
+if(module){
+  delete (llvm::Module*)module;
+  module = NULL;
+}
+
+if(llvm_ctx){
+  delete (llvm::LLVMContext*)llvm_ctx;
+  llvm_ctx = NULL;
+}
+  }
 
   /*! We must avoid spilling at all cost with Gen */
   static const struct CodeGenStrategy {
@@ -177,16 +200,18 @@ namespace gbe {
 
   static gbe_program genProgramNewFromLLVM(uint32_t deviceID,
const char *fileName,
+   const void* module,
+   const void* llvm_ctx,
size_t stringSize,
char *err,
size_t *errSize,
int optLevel)
   {
 using namespace gbe;
-GenProgram *program = GBE_NEW(GenProgram, deviceID);
+GenProgram *program = GBE_NEW(GenProgram, deviceID, module, llvm_ctx);
 std::string error;
 // Try to compile the program
-if (program->buildFromLLVMFile(fileName, error, optLevel) == false) {
+if (program->buildFromLLVMFile(fileName, module, error, optLevel) == 
false) {
   if (err != NULL && errSize != NULL && stringSize > 0u) {
 const size_t msgSize = std::min(error.size(), stringSize-1u);
 std::memcpy(err, error.c_str(), msgSize);
diff --git a/backend/src/backend/gen_program.hpp 
b/backend/src/backend/gen_program.hpp
index ea54b49..70794c9 100644
--- a/backend/src/backend/gen_program.hpp
+++ b/backend/src/backend/gen_program.hpp
@@ -58,7 +58,7 @@ namespace gbe
   {
   public:
 /*! Create an empty program */
-GenProgram(uint32_t deviceID) : deviceID(deviceID) {}
+GenProgram(uint32_t deviceID, const void* mod = NULL, const void* ctx = 
NULL) : deviceID(deviceID),module((void*)mod), llvm_ctx((void*)ctx) {}
 /*! Current device ID*/
 uint32_t deviceID;
 /*! Destroy the program */
@@ -69,6 +69,8 @@ namespace gbe
 virtual Kernel *allocateKernel(const std::string &name) {
   return GBE_NEW(GenKernel, name);
 }
+void* module;
+void* llvm_ctx;
 /*! Use custom allocators */
 GBE_CLASS(GenProgram);
   };
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index bdc7d34..66a5ce0 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -34,6 +34,8 @@
 #include "llvm/Config/config.h"
 #include "llvm/Support/Threading.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/IR/LLVMContext.h"
 #include 
 #include 
 #include 
@@ -102,9 +104,9 @@ namespace gbe {
 
   BVAR(OCL_OUTPUT_GEN_IR, false);
 
-  bool Program::buildFromLLVMFile(const char *fileName, std::string &error, 
int optLevel) {
+  bool Program::buildFromLLVMFile(const char *fileName, const void* module, 
std::string &error, int optLevel) {
 ir::Unit *unit = new ir::Unit();
-if (llvmToGen(*unit, fileName, optLevel) == false) {
+if (llvmToGen(*unit, fileName, module, optLevel) == false) {
   error = std::string(fileName) + " not found";
   return false;
 }
@@ -113,7 +115,7 @@ namespace gbe {
 if(!unit->getValid()) {
   delete unit;   //clear unit
   unit = new ir::Unit();
-  llvmToGen(*unit, fileName, 0);  //suppose file exists and llvmToGen will 
not return false.
+  llvmToGen(*unit, fileName, module, 0);  //suppose file exists and

[Beignet] [PATCH V1] remove the code of saving the llvm bitcode to file, replace it with llvm::Module

2014-05-28 Thread xionghu . luo
From: Luo 

Save the global LLVMContext and module pointer to GenProgram, delete the
module pointer in the destructor.

Signed-off-by: Luo 
---
 backend/src/backend/gen_program.cpp | 30 ---
 backend/src/backend/gen_program.hpp |  4 +++-
 backend/src/backend/program.cpp | 48 +++--
 backend/src/backend/program.h   |  3 ++-
 backend/src/backend/program.hpp |  2 +-
 backend/src/llvm/llvm_to_gen.cpp| 16 -
 backend/src/llvm/llvm_to_gen.hpp|  2 +-
 src/cl_program.c|  2 +-
 8 files changed, 59 insertions(+), 48 deletions(-)

diff --git a/backend/src/backend/gen_program.cpp 
b/backend/src/backend/gen_program.cpp
index 52db904..7019060 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -22,6 +22,17 @@
  * \author Benjamin Segovia 
  */
 
+#include "llvm/Config/config.h"
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/DataLayout.h"
+#else
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/DataLayout.h"
+#endif  /* LLVM_VERSION_MINOR <= 2 */
+
 #include "backend/program.h"
 #include "backend/gen_program.h"
 #include "backend/gen_program.hpp"
@@ -33,6 +44,8 @@
 #include "ir/unit.hpp"
 #include "llvm/llvm_to_gen.hpp"
 
+#include 
+
 #include 
 #include 
 #include 
@@ -72,7 +85,16 @@ namespace gbe {
 fclose(f);
   }
 
-  GenProgram::~GenProgram(void) {}
+  GenProgram::~GenProgram(void){
+if(module){
+  delete (llvm::Module*)module;
+  module = NULL;
+}
+
+if(llvm_ctx){
+  llvm_ctx = NULL;
+}
+  }
 
   /*! We must avoid spilling at all cost with Gen */
   static const struct CodeGenStrategy {
@@ -177,16 +199,18 @@ namespace gbe {
 
   static gbe_program genProgramNewFromLLVM(uint32_t deviceID,
const char *fileName,
+   const void* module,
+   const void* llvm_ctx,
size_t stringSize,
char *err,
size_t *errSize,
int optLevel)
   {
 using namespace gbe;
-GenProgram *program = GBE_NEW(GenProgram, deviceID);
+GenProgram *program = GBE_NEW(GenProgram, deviceID, module, llvm_ctx);
 std::string error;
 // Try to compile the program
-if (program->buildFromLLVMFile(fileName, error, optLevel) == false) {
+if (program->buildFromLLVMFile(fileName, module, error, optLevel) == 
false) {
   if (err != NULL && errSize != NULL && stringSize > 0u) {
 const size_t msgSize = std::min(error.size(), stringSize-1u);
 std::memcpy(err, error.c_str(), msgSize);
diff --git a/backend/src/backend/gen_program.hpp 
b/backend/src/backend/gen_program.hpp
index ea54b49..70794c9 100644
--- a/backend/src/backend/gen_program.hpp
+++ b/backend/src/backend/gen_program.hpp
@@ -58,7 +58,7 @@ namespace gbe
   {
   public:
 /*! Create an empty program */
-GenProgram(uint32_t deviceID) : deviceID(deviceID) {}
+GenProgram(uint32_t deviceID, const void* mod = NULL, const void* ctx = 
NULL) : deviceID(deviceID),module((void*)mod), llvm_ctx((void*)ctx) {}
 /*! Current device ID*/
 uint32_t deviceID;
 /*! Destroy the program */
@@ -69,6 +69,8 @@ namespace gbe
 virtual Kernel *allocateKernel(const std::string &name) {
   return GBE_NEW(GenKernel, name);
 }
+void* module;
+void* llvm_ctx;
 /*! Use custom allocators */
 GBE_CLASS(GenProgram);
   };
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index bdc7d34..f8e5d0f 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -34,6 +34,8 @@
 #include "llvm/Config/config.h"
 #include "llvm/Support/Threading.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/IR/LLVMContext.h"
 #include 
 #include 
 #include 
@@ -102,9 +104,9 @@ namespace gbe {
 
   BVAR(OCL_OUTPUT_GEN_IR, false);
 
-  bool Program::buildFromLLVMFile(const char *fileName, std::string &error, 
int optLevel) {
+  bool Program::buildFromLLVMFile(const char *fileName, const void* module, 
std::string &error, int optLevel) {
 ir::Unit *unit = new ir::Unit();
-if (llvmToGen(*unit, fileName, optLevel) == false) {
+if (llvmToGen(*unit, fileName, module, optLevel) == false) {
   error = std::string(fileName) + " not found";
   return false;
 }
@@ -113,7 +115,7 @@ namespace gbe {
 if(!unit->getValid()) {
   delete unit;   //clear unit
   unit = new ir::Unit();
-  llvmToGen(*unit, fileName, 0);  //suppose file exists and llvmToGen will 
not return false.
+  llvmToGen(*unit, fileName, module, 0);  //suppose file exists and 
llvmToGen will not

[Beignet] [PATCH 2/4] add [opencl-1.2] API clCompileProgram.

2014-05-28 Thread xionghu . luo
From: Luo 

This API compiles a program's source for all the devices or a specific
device in the OpenCL context associated with program.
The pre-processor runs before the program sources are compiled.

Signed-off-by: Luo 
---
 backend/src/backend/gen_program.cpp |   1 -
 backend/src/backend/program.cpp | 158 +++-
 backend/src/backend/program.h   |  10 +++
 src/cl_api.c|  41 ++
 src/cl_program.c| 103 +++
 src/cl_program.h|   8 +-
 6 files changed, 318 insertions(+), 3 deletions(-)

diff --git a/backend/src/backend/gen_program.cpp 
b/backend/src/backend/gen_program.cpp
index 74b6fa1..7019060 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -92,7 +92,6 @@ namespace gbe {
 }
 
 if(llvm_ctx){
-  delete (llvm::LLVMContext*)llvm_ctx;
   llvm_ctx = NULL;
 }
   }
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index 90306cc..18895cd 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -801,7 +801,161 @@ namespace gbe {
 gbe_program p;
 // will delete the module and llvm_ctx in the destructor of GenProgram.
 llvm::Module * out_module;
-llvm::LLVMContext* llvm_ctx = new llvm::LLVMContext;
+llvm::LLVMContext* llvm_ctx = &llvm::getGlobalContext();
+if (buildModuleFromSource(clName.c_str(), &out_module, llvm_ctx, 
clOpt.c_str(),
+  stringSize, err, errSize)) {
+// Now build the program from llvm
+  static std::mutex gbe_mutex;
+  gbe_mutex.lock();
+  size_t clangErrSize = 0;
+  if (err != NULL) {
+GBE_ASSERT(errSize != NULL);
+stringSize -= *errSize;
+err += *errSize;
+clangErrSize = *errSize;
+  }
+  p = gbe_program_new_from_llvm(deviceID, NULL, out_module, llvm_ctx, 
stringSize,
+err, errSize, optLevel);
+  if (err != NULL)
+*errSize += clangErrSize;
+  gbe_mutex.unlock();
+  if (OCL_OUTPUT_BUILD_LOG && options)
+llvm::errs() << options;
+} else
+  p = NULL;
+remove(clName.c_str());
+return p;
+  }
+
+  static gbe_program programCompileFromSource(uint32_t deviceID,
+  const char *source,
+  const char *temp_header_path,
+  size_t stringSize,
+  const char *options,
+  char *err,
+  size_t *errSize)
+  {
+char clStr[] = "/tmp/XX.cl";
+int clFd = mkstemps(clStr, 3);
+const std::string clName = std::string(clStr);
+std::string clOpt;
+
+FILE *clFile = fdopen(clFd, "w");
+FATAL_IF(clFile == NULL, "Failed to open temporary file");
+
+bool usePCH = OCL_USE_PCH;
+bool findPCH = false;
+
+/* Because our header file is so big, we want to avoid recompile the 
header from
+   scratch. We use the PCH support of Clang to save the huge compiling 
time.
+   We just use the most general build opt to build the PCH header file, so 
if
+   user pass new build options here, the PCH can not pass the Clang's 
compitable
+   validating. Clang will do three kinds of compatible check: Language 
Option,
+   Target Option and Preprocessing Option. Other kinds of options such as 
the
+   CodeGen options will not affect the AST result, so no need to check.
+
+   According to OpenCL 1.1's spec, the CL build options:
+   -D name=definition
+   If the definition is not used in our header, it is compitable
+
+   -cl-single-precision-constant
+   -cl-denorms-are-zero
+   -cl-std=
+   Language options, really affect.
+
+   -cl-opt-disable
+   -cl-mad-enable
+   -cl-no-signed-zeros
+   -cl-unsafe-math-optimizations
+   -cl-finite-math-only
+   -cl-fast-relaxed-math
+   CodeGen options, not affect
+
+   -Werror
+   -w
+   Our header should not block the compiling because of warning.
+
+   So we just disable the PCH validation of Clang and do the judgement by 
ourself. */
+
+if(options) {
+  char *p;
+  /* FIXME: Though we can disable the pch valid check, and load pch 
successfully,
+ but these language opts and pre-defined macro will still generate the 
diag msg
+ to the diag engine of the Clang and cause the Clang to report error.
+ We filter them all here to avoid these. */
+  const char * incompatible_opts[] = {
+  "-cl-single-precision-constant",
+//"-cl-denorms-are-zero",
+  "-cl-fast-relaxed-math",
+  "-cl-std=",
+  };
+  const char * incompatible_defs[] = {
+  "GET_FLOAT_WORD",
+  "__NV_CL_C_VERSION",
+  "GEN7_SAMPLER_CLAMP_BORDER_W

[Beignet] [PATCH 3/4] add [opencl-1.2] API clLinkProgram.

2014-05-28 Thread xionghu . luo
From: Luo 

this API links a set of compiled program objects and libraries for all
the devices or a specific device(s) in the OpenCL context and creates
an executable.
the llvm bitcode in the compiled program objects are linked together and
built to Gen binary.

Signed-off-by: Luo 
---
 backend/src/backend/gen_program.cpp | 116 
 backend/src/backend/program.cpp |  23 ++-
 backend/src/backend/program.h   |  28 +
 src/cl_api.c|  33 ++
 src/cl_program.c|  69 +++--
 src/cl_program.h|   7 +++
 6 files changed, 257 insertions(+), 19 deletions(-)

diff --git a/backend/src/backend/gen_program.cpp 
b/backend/src/backend/gen_program.cpp
index 7019060..dc885d5 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -33,6 +33,9 @@
 #include "llvm/IR/DataLayout.h"
 #endif  /* LLVM_VERSION_MINOR <= 2 */
 
+#include "llvm/Linker.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
 #include "backend/program.h"
 #include "backend/gen_program.h"
 #include "backend/gen_program.hpp"
@@ -222,6 +225,116 @@ namespace gbe {
 // Everything run fine
 return (gbe_program) program;
   }
+
+  static gbe_program genProgramNewGenProgram(uint32_t deviceID, const void* 
module, const void* llvm_ctx)
+  {
+using namespace gbe;
+GenProgram *program = GBE_NEW(GenProgram, deviceID, module, llvm_ctx);
+// Everything run fine
+return (gbe_program) program;
+  }
+
+  static void genProgramLinkFromLLVM(gbe_program   dst_program,
+ gbe_program   src_program,
+ size_tstringSize,
+ char *err,
+ size_t *  errSize)
+  {
+using namespace gbe;
+std::string errMsg;
+if(((GenProgram*)dst_program)->module == NULL){
+  ((GenProgram*)dst_program)->module = 
llvm::CloneModule((llvm::Module*)((GenProgram*)src_program)->module);
+  errSize = 0;
+}else{
+  llvm::Module* src = (llvm::Module*)((GenProgram*)src_program)->module;
+  llvm::GlobalVariable* gv = src->getNamedGlobal("PIo2");
+  gv->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  gv = src->getNamedGlobal("npio2_hw");
+  gv->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  gv = src->getNamedGlobal("two_over_pi");
+  gv->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  gv = src->getNamedGlobal("atanhi");
+  gv->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  gv = src->getNamedGlobal("atanlo");
+  gv->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+
+  llvm::Function* fc = src->getFunction("barrier");
+  fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  fc = src->getFunction("__gen_memset_p");
+  fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  fc = src->getFunction("__gen_memset_g");
+  fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  fc = src->getFunction("__gen_memset_l");
+  fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  fc = src->getFunction("__gen_memcpy_gg");
+  fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  fc = src->getFunction("__gen_memcpy_gp");
+  fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  fc = src->getFunction("__gen_memcpy_gl");
+  fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  fc = src->getFunction("__gen_memcpy_pg");
+  fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  fc = src->getFunction("__gen_memcpy_pp");
+  fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  fc = src->getFunction("__gen_memcpy_pl");
+  fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  fc = src->getFunction("__gen_memcpy_lg");
+  fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  fc = src->getFunction("__gen_memcpy_lp");
+  fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  fc = src->getFunction("__gen_memcpy_ll");
+  fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+
+  llvm::Module* dst = (llvm::Module*)((GenProgram*)dst_program)->module;
+  llvm::Linker::LinkModules( dst,
+ src,
+ llvm::Linker::PreserveSource,
+ &errMsg);
+  if (errMsg.c_str() != NULL) {
+if (err != NULL && errSize != NULL && stringSize > 0u) {
+  if(errMsg.length() < stringSize )
+stringSize = errMsg.length();
+  strcpy(err, errMsg.c_str());
+  err[stringSize+1] = '\0';
+}
+  }
+  printf("%s\n", err);
+}
+// Everything run fine
+  }
+
+  static void genProgramBuildFromLLVM(gbe_program program,
+  size_t stringSize,
+  

[Beignet] [PATCH 4/4] add [opencl-1.2] test case runtime_cl.

2014-05-28 Thread xionghu . luo
From: Luo 

---
 utests/CMakeLists.txt   |   1 +
 utests/runtime_compile_link.cpp | 127 
 2 files changed, 128 insertions(+)
 create mode 100644 utests/runtime_compile_link.cpp

diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 5f0649f..c6d4098 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -153,6 +153,7 @@ set (utests_sources
   runtime_createcontext.cpp
   runtime_null_kernel_arg.cpp
   runtime_event.cpp
+  runtime_compile_link.cpp
   compiler_double.cpp
   compiler_double_2.cpp
   compiler_double_3.cpp
diff --git a/utests/runtime_compile_link.cpp b/utests/runtime_compile_link.cpp
new file mode 100644
index 000..df55ab8
--- /dev/null
+++ b/utests/runtime_compile_link.cpp
@@ -0,0 +1,127 @@
+#include 
+#include 
+#include 
+#include "utest_helper.hpp"
+#include "utest_file_map.hpp"
+
+#define BUFFERSIZE  32*1024
+
+int init_program(const char* name, cl_context ctx, cl_program *pg )
+{
+  cl_int err;
+  char* ker_path = cl_do_kiss_path(name, device);
+
+  cl_file_map_t *fm = cl_file_map_new();
+  err = cl_file_map_open(fm, ker_path);
+  if(err != CL_FILE_MAP_SUCCESS)
+OCL_ASSERT(0);
+  const char *src = cl_file_map_begin(fm);
+
+  *pg = clCreateProgramWithSource(ctx, 1, &src, NULL, &err);
+  free(ker_path);
+  cl_file_map_delete(fm);
+  return 0;
+
+}
+
+void runtime_cl(void)
+{
+
+  cl_int err;
+
+  const char* header_file_name="multi2.h";
+  cl_program foo_pg;
+  init_program(header_file_name, ctx, &foo_pg);
+
+  const char* myinc_file_name="mydir/multi3.h";
+  cl_program myinc_pg;
+  init_program(myinc_file_name, ctx, &myinc_pg);
+
+  const char* file_name_A="multi_A.cl";
+  cl_program program_A;
+  init_program(file_name_A, ctx, &program_A);
+
+  cl_program input_headers[2] = { foo_pg, myinc_pg};
+  const char * input_header_names[2] = { "multi2.h", "mydir/multi3.h"};
+
+  err = clCompileProgram(program_A,
+0, NULL, // num_devices & device_list
+NULL, // compile_options
+2, // num_input_headers
+input_headers,
+input_header_names,
+NULL, NULL);
+
+  OCL_ASSERT(err==CL_SUCCESS);
+  const char* file_name_B="multi_B.cl";
+  cl_program program_B;
+  init_program(file_name_B, ctx, &program_B);
+
+  err = clCompileProgram(program_B,
+0, NULL, // num_devices & device_list
+NULL, // compile_options
+2, // num_input_headers
+input_headers,
+input_header_names,
+NULL, NULL);
+
+  OCL_ASSERT(err==CL_SUCCESS);
+  cl_program input_programs[2] = { program_A, program_B};
+  cl_program linked_program = clLinkProgram(ctx, 0, NULL, NULL, 2, 
input_programs, NULL, NULL, &err);
+
+
+  OCL_ASSERT(linked_program != NULL);
+  OCL_ASSERT(err == CL_SUCCESS);
+
+  // link success, run this kernel.
+
+  const size_t n = 16;
+  int64_t src1[n], src2[n];
+
+  src1[0] = (int64_t)1 << 63, src2[0] = 0x7FFFll;
+  src1[1] = (int64_t)1 << 63, src2[1] = ((int64_t)1 << 63) | 1;
+  src1[2] = -1ll, src2[2] = 0;
+  src1[3] = ((int64_t)123 << 32) | 0x7FFF, src2[3] = ((int64_t)123 << 32) 
| 0x8000;
+  src1[4] = 0x7FFFll, src2[4] = (int64_t)1 << 63;
+  src1[5] = ((int64_t)1 << 63) | 1, src2[5] = (int64_t)1 << 63;
+  src1[6] = 0, src2[6] = -1ll;
+  src1[7] = ((int64_t)123 << 32) | 0x8000, src2[7] = ((int64_t)123 << 32) 
| 0x7FFF;
+  for(size_t i=8; ihttp://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 1/4] [opencl-1.2]remove the code of saving the llvm bitcode to file, replace it with module pointer.

2014-05-28 Thread xionghu . luo
From: Luo 

Save the global LLVMContext and module pointer to GenProgram, delete
the module pointer in the destructor.

Signed-off-by: Luo 
---
 backend/src/backend/gen_program.cpp | 31 +---
 backend/src/backend/gen_program.hpp |  4 +++-
 backend/src/backend/program.cpp | 47 ++---
 backend/src/backend/program.h   |  3 ++-
 backend/src/backend/program.hpp |  2 +-
 backend/src/llvm/llvm_to_gen.cpp| 16 -
 backend/src/llvm/llvm_to_gen.hpp|  2 +-
 src/cl_program.c|  2 +-
 8 files changed, 59 insertions(+), 48 deletions(-)

diff --git a/backend/src/backend/gen_program.cpp 
b/backend/src/backend/gen_program.cpp
index 52db904..74b6fa1 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -22,6 +22,17 @@
  * \author Benjamin Segovia 
  */
 
+#include "llvm/Config/config.h"
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/DataLayout.h"
+#else
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/DataLayout.h"
+#endif  /* LLVM_VERSION_MINOR <= 2 */
+
 #include "backend/program.h"
 #include "backend/gen_program.h"
 #include "backend/gen_program.hpp"
@@ -33,6 +44,8 @@
 #include "ir/unit.hpp"
 #include "llvm/llvm_to_gen.hpp"
 
+#include 
+
 #include 
 #include 
 #include 
@@ -72,7 +85,17 @@ namespace gbe {
 fclose(f);
   }
 
-  GenProgram::~GenProgram(void) {}
+  GenProgram::~GenProgram(void){
+if(module){
+  delete (llvm::Module*)module;
+  module = NULL;
+}
+
+if(llvm_ctx){
+  delete (llvm::LLVMContext*)llvm_ctx;
+  llvm_ctx = NULL;
+}
+  }
 
   /*! We must avoid spilling at all cost with Gen */
   static const struct CodeGenStrategy {
@@ -177,16 +200,18 @@ namespace gbe {
 
   static gbe_program genProgramNewFromLLVM(uint32_t deviceID,
const char *fileName,
+   const void* module,
+   const void* llvm_ctx,
size_t stringSize,
char *err,
size_t *errSize,
int optLevel)
   {
 using namespace gbe;
-GenProgram *program = GBE_NEW(GenProgram, deviceID);
+GenProgram *program = GBE_NEW(GenProgram, deviceID, module, llvm_ctx);
 std::string error;
 // Try to compile the program
-if (program->buildFromLLVMFile(fileName, error, optLevel) == false) {
+if (program->buildFromLLVMFile(fileName, module, error, optLevel) == 
false) {
   if (err != NULL && errSize != NULL && stringSize > 0u) {
 const size_t msgSize = std::min(error.size(), stringSize-1u);
 std::memcpy(err, error.c_str(), msgSize);
diff --git a/backend/src/backend/gen_program.hpp 
b/backend/src/backend/gen_program.hpp
index ea54b49..70794c9 100644
--- a/backend/src/backend/gen_program.hpp
+++ b/backend/src/backend/gen_program.hpp
@@ -58,7 +58,7 @@ namespace gbe
   {
   public:
 /*! Create an empty program */
-GenProgram(uint32_t deviceID) : deviceID(deviceID) {}
+GenProgram(uint32_t deviceID, const void* mod = NULL, const void* ctx = 
NULL) : deviceID(deviceID),module((void*)mod), llvm_ctx((void*)ctx) {}
 /*! Current device ID*/
 uint32_t deviceID;
 /*! Destroy the program */
@@ -69,6 +69,8 @@ namespace gbe
 virtual Kernel *allocateKernel(const std::string &name) {
   return GBE_NEW(GenKernel, name);
 }
+void* module;
+void* llvm_ctx;
 /*! Use custom allocators */
 GBE_CLASS(GenProgram);
   };
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index bdc7d34..90306cc 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -34,6 +34,8 @@
 #include "llvm/Config/config.h"
 #include "llvm/Support/Threading.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/IR/LLVMContext.h"
 #include 
 #include 
 #include 
@@ -102,9 +104,9 @@ namespace gbe {
 
   BVAR(OCL_OUTPUT_GEN_IR, false);
 
-  bool Program::buildFromLLVMFile(const char *fileName, std::string &error, 
int optLevel) {
+  bool Program::buildFromLLVMFile(const char *fileName, const void* module, 
std::string &error, int optLevel) {
 ir::Unit *unit = new ir::Unit();
-if (llvmToGen(*unit, fileName, optLevel) == false) {
+if (llvmToGen(*unit, fileName, module, optLevel) == false) {
   error = std::string(fileName) + " not found";
   return false;
 }
@@ -113,7 +115,7 @@ namespace gbe {
 if(!unit->getValid()) {
   delete unit;   //clear unit
   unit = new ir::Unit();
-  llvmToGen(*unit, fileName, 0);  //suppose file exists and llvmToGen will 
not return false.
+  llvmToGen(*unit, fileName, module, 0);  

[Beignet] [PATCH V2] remove the code of saving the llvm bitcode to file, replace it with llvm::Module

2014-06-04 Thread xionghu . luo
From: Luo 

Save the global LLVMContext and module pointer to GenProgram, delete the
module pointer in the destructor.

Signed-off-by: Luo 
---
 backend/src/backend/gen_program.cpp | 33 +++--
 backend/src/backend/gen_program.hpp |  4 ++-
 backend/src/backend/program.cpp | 59 -
 backend/src/backend/program.h   |  3 +-
 backend/src/backend/program.hpp |  2 +-
 backend/src/llvm/llvm_to_gen.cpp| 16 ++
 backend/src/llvm/llvm_to_gen.hpp|  2 +-
 src/cl_program.c|  2 +-
 8 files changed, 73 insertions(+), 48 deletions(-)

diff --git a/backend/src/backend/gen_program.cpp 
b/backend/src/backend/gen_program.cpp
index d2e95d4..33f2ed6 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -22,6 +22,17 @@
  * \author Benjamin Segovia 
  */
 
+#include "llvm/Config/config.h"
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/DataLayout.h"
+#else
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/DataLayout.h"
+#endif  /* LLVM_VERSION_MINOR <= 2 */
+
 #include "backend/program.h"
 #include "backend/gen_program.h"
 #include "backend/gen_program.hpp"
@@ -33,6 +44,8 @@
 #include "ir/unit.hpp"
 #include "llvm/llvm_to_gen.hpp"
 
+#include 
+
 #include 
 #include 
 #include 
@@ -74,7 +87,19 @@ namespace gbe {
 #endif
   }
 
-  GenProgram::~GenProgram(void) {}
+  GenProgram::~GenProgram(void){
+#ifdef GBE_COMPILER_AVAILABLE
+if(module){
+  delete (llvm::Module*)module;
+  module = NULL;
+}
+
+if(llvm_ctx){
+  delete (llvm::LLVMContext*)llvm_ctx;
+  llvm_ctx = NULL;
+}
+#endif
+  }
 
   /*! We must avoid spilling at all cost with Gen */
   static const struct CodeGenStrategy {
@@ -182,17 +207,19 @@ namespace gbe {
 
   static gbe_program genProgramNewFromLLVM(uint32_t deviceID,
const char *fileName,
+   const void* module,
+   const void* llvm_ctx,
size_t stringSize,
char *err,
size_t *errSize,
int optLevel)
   {
 using namespace gbe;
-GenProgram *program = GBE_NEW(GenProgram, deviceID);
+GenProgram *program = GBE_NEW(GenProgram, deviceID, module, llvm_ctx);
 #ifdef GBE_COMPILER_AVAILABLE
 std::string error;
 // Try to compile the program
-if (program->buildFromLLVMFile(fileName, error, optLevel) == false) {
+if (program->buildFromLLVMFile(fileName, module, error, optLevel) == 
false) {
   if (err != NULL && errSize != NULL && stringSize > 0u) {
 const size_t msgSize = std::min(error.size(), stringSize-1u);
 std::memcpy(err, error.c_str(), msgSize);
diff --git a/backend/src/backend/gen_program.hpp 
b/backend/src/backend/gen_program.hpp
index ea54b49..70794c9 100644
--- a/backend/src/backend/gen_program.hpp
+++ b/backend/src/backend/gen_program.hpp
@@ -58,7 +58,7 @@ namespace gbe
   {
   public:
 /*! Create an empty program */
-GenProgram(uint32_t deviceID) : deviceID(deviceID) {}
+GenProgram(uint32_t deviceID, const void* mod = NULL, const void* ctx = 
NULL) : deviceID(deviceID),module((void*)mod), llvm_ctx((void*)ctx) {}
 /*! Current device ID*/
 uint32_t deviceID;
 /*! Destroy the program */
@@ -69,6 +69,8 @@ namespace gbe
 virtual Kernel *allocateKernel(const std::string &name) {
   return GBE_NEW(GenKernel, name);
 }
+void* module;
+void* llvm_ctx;
 /*! Use custom allocators */
 GBE_CLASS(GenProgram);
   };
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index 949aeb4..26d9454 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -34,6 +34,8 @@
 #include "llvm/Config/config.h"
 #include "llvm/Support/Threading.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/IR/LLVMContext.h"
 #include 
 #include 
 #include 
@@ -103,9 +105,13 @@ namespace gbe {
 #ifdef GBE_COMPILER_AVAILABLE
   BVAR(OCL_OUTPUT_GEN_IR, false);
 
-  bool Program::buildFromLLVMFile(const char *fileName, std::string &error, 
int optLevel) {
+  bool Program::buildFromLLVMFile(const char *fileName, const void* module, 
std::string &error, int optLevel) {
 ir::Unit *unit = new ir::Unit();
-if (llvmToGen(*unit, fileName, optLevel) == false) {
+llvm::Module * cloned_module = NULL;
+if(module){
+  cloned_module = llvm::CloneModule((llvm::Module*)module);
+}
+if (llvmToGen(*unit, fileName, module, optLevel) == false) {
   error = std::string(fileName) + " not found";
   return false;
 }
@@ -114,11 +120,18 @@ namespace gbe {
 if(!unit->getValid

[Beignet] [PATCH V2 2/3] add [opencl-1.2] API clLinkProgram.

2014-06-05 Thread xionghu . luo
From: Luo 

this API links a set of compiled program objects and libraries for all
the devices or a specific device(s) in the OpenCL context and creates
an executable.
the llvm bitcode in the compiled program objects are linked together and
built to Gen binary.

Signed-off-by: Luo 
---
 backend/src/backend/gen_program.cpp | 120 
 backend/src/backend/program.cpp |  33 +++---
 backend/src/backend/program.h   |  28 +
 src/cl_api.c|  33 ++
 src/cl_gbe_loader.cpp   |  12 
 src/cl_program.c|  69 -
 src/cl_program.h|   7 +++
 7 files changed, 277 insertions(+), 25 deletions(-)

diff --git a/backend/src/backend/gen_program.cpp 
b/backend/src/backend/gen_program.cpp
index 33f2ed6..d7cb898 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -33,6 +33,9 @@
 #include "llvm/IR/DataLayout.h"
 #endif  /* LLVM_VERSION_MINOR <= 2 */
 
+#include "llvm/Linker.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
 #include "backend/program.h"
 #include "backend/gen_program.h"
 #include "backend/gen_program.hpp"
@@ -232,6 +235,120 @@ namespace gbe {
 // Everything run fine
 return (gbe_program) program;
   }
+
+  static gbe_program genProgramNewGenProgram(uint32_t deviceID, const void* 
module, const void* llvm_ctx)
+  {
+using namespace gbe;
+GenProgram *program = GBE_NEW(GenProgram, deviceID, module, llvm_ctx);
+// Everything run fine
+return (gbe_program) program;
+  }
+
+  static void genProgramLinkFromLLVM(gbe_program   dst_program,
+ gbe_program   src_program,
+ size_tstringSize,
+ char *err,
+ size_t *  errSize)
+  {
+#ifdef GBE_COMPILER_AVAILABLE
+using namespace gbe;
+std::string errMsg;
+if(((GenProgram*)dst_program)->module == NULL){
+  ((GenProgram*)dst_program)->module = 
llvm::CloneModule((llvm::Module*)((GenProgram*)src_program)->module);
+  errSize = 0;
+}else{
+  llvm::Module* src = (llvm::Module*)((GenProgram*)src_program)->module;
+  llvm::GlobalVariable* gv = src->getNamedGlobal("PIo2");
+  gv->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  gv = src->getNamedGlobal("npio2_hw");
+  gv->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  gv = src->getNamedGlobal("two_over_pi");
+  gv->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  gv = src->getNamedGlobal("atanhi");
+  gv->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  gv = src->getNamedGlobal("atanlo");
+  gv->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+
+  llvm::Function* fc = src->getFunction("barrier");
+  fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  fc = src->getFunction("__gen_memset_p");
+  fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  fc = src->getFunction("__gen_memset_g");
+  fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  fc = src->getFunction("__gen_memset_l");
+  fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  fc = src->getFunction("__gen_memcpy_gg");
+  fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  fc = src->getFunction("__gen_memcpy_gp");
+  fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  fc = src->getFunction("__gen_memcpy_gl");
+  fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  fc = src->getFunction("__gen_memcpy_pg");
+  fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  fc = src->getFunction("__gen_memcpy_pp");
+  fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  fc = src->getFunction("__gen_memcpy_pl");
+  fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  fc = src->getFunction("__gen_memcpy_lg");
+  fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  fc = src->getFunction("__gen_memcpy_lp");
+  fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  fc = src->getFunction("__gen_memcpy_ll");
+  fc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+
+  llvm::Module* dst = (llvm::Module*)((GenProgram*)dst_program)->module;
+  llvm::Linker::LinkModules( dst,
+ src,
+ llvm::Linker::PreserveSource,
+ &errMsg);
+  if (errMsg.c_str() != NULL) {
+if (err != NULL && errSize != NULL && stringSize > 0u) {
+  if(errMsg.length() < stringSize )
+stringSize = errMsg.length();
+  strcpy(err, errMsg.c_str());
+  err[stringSize+1] = '\0';
+}
+  }
+  printf("%s\n", err);
+}
+// Everything run fine
+#endif
+  }
+
+  static void genProgramBuildFromLLVM(gbe_prog

[Beignet] [PATCH V2 3/3] add [opencl-1.2] test case runtime_cl.

2014-06-05 Thread xionghu . luo
From: Luo 

Signed-off-by: Luo 
---
 kernels/multi2.h|   1 +
 kernels/multi_A.cl  |  13 
 kernels/multi_B.cl  |   9 +++
 kernels/mydir/multi3.h  |   4 ++
 utests/CMakeLists.txt   |   1 +
 utests/runtime_compile_link.cpp | 127 
 6 files changed, 155 insertions(+)
 create mode 100644 kernels/multi2.h
 create mode 100644 kernels/multi_A.cl
 create mode 100644 kernels/multi_B.cl
 create mode 100644 kernels/mydir/multi3.h
 create mode 100644 utests/runtime_compile_link.cpp

diff --git a/kernels/multi2.h b/kernels/multi2.h
new file mode 100644
index 000..ae2c56e
--- /dev/null
+++ b/kernels/multi2.h
@@ -0,0 +1 @@
+int comp_long(long x, long y);
diff --git a/kernels/multi_A.cl b/kernels/multi_A.cl
new file mode 100644
index 000..9282b8d
--- /dev/null
+++ b/kernels/multi_A.cl
@@ -0,0 +1,13 @@
+#include "multi2.h"
+#include "mydir/multi3.h"
+
+int comp_long(long x, long y)
+{
+  return x < y ;
+}
+
+kernel void multi_A(global long *src1, global long *src2, global long *dst) {
+  int i = get_global_id(0);
+  int j = comp_long(src1[i], src2[i]);
+  dst[i] = j ? 3 : 4;
+}
diff --git a/kernels/multi_B.cl b/kernels/multi_B.cl
new file mode 100644
index 000..de147eb
--- /dev/null
+++ b/kernels/multi_B.cl
@@ -0,0 +1,9 @@
+#include "multi2.h"
+#include "mydir/multi3.h"
+
+kernel void multi_B(global long *src1, global long *src2, global long *dst) {
+  int i = get_global_id(0);
+  int j = comp_long(src1[i], src2[i]);
+  dst[i] = j ? 3 : 4;
+  int k = greater(src1[i], src2[i]);
+}
diff --git a/kernels/mydir/multi3.h b/kernels/mydir/multi3.h
new file mode 100644
index 000..4011278
--- /dev/null
+++ b/kernels/mydir/multi3.h
@@ -0,0 +1,4 @@
+inline int greater(long x, long y)
+{
+  return x > y ;
+}
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 698c9ff..bee3e8f 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -157,6 +157,7 @@ set (utests_sources
   runtime_event.cpp
   runtime_barrier_list.cpp
   runtime_marker_list.cpp
+  runtime_compile_link.cpp
   compiler_double.cpp
   compiler_double_2.cpp
   compiler_double_3.cpp
diff --git a/utests/runtime_compile_link.cpp b/utests/runtime_compile_link.cpp
new file mode 100644
index 000..df55ab8
--- /dev/null
+++ b/utests/runtime_compile_link.cpp
@@ -0,0 +1,127 @@
+#include 
+#include 
+#include 
+#include "utest_helper.hpp"
+#include "utest_file_map.hpp"
+
+#define BUFFERSIZE  32*1024
+
+int init_program(const char* name, cl_context ctx, cl_program *pg )
+{
+  cl_int err;
+  char* ker_path = cl_do_kiss_path(name, device);
+
+  cl_file_map_t *fm = cl_file_map_new();
+  err = cl_file_map_open(fm, ker_path);
+  if(err != CL_FILE_MAP_SUCCESS)
+OCL_ASSERT(0);
+  const char *src = cl_file_map_begin(fm);
+
+  *pg = clCreateProgramWithSource(ctx, 1, &src, NULL, &err);
+  free(ker_path);
+  cl_file_map_delete(fm);
+  return 0;
+
+}
+
+void runtime_cl(void)
+{
+
+  cl_int err;
+
+  const char* header_file_name="multi2.h";
+  cl_program foo_pg;
+  init_program(header_file_name, ctx, &foo_pg);
+
+  const char* myinc_file_name="mydir/multi3.h";
+  cl_program myinc_pg;
+  init_program(myinc_file_name, ctx, &myinc_pg);
+
+  const char* file_name_A="multi_A.cl";
+  cl_program program_A;
+  init_program(file_name_A, ctx, &program_A);
+
+  cl_program input_headers[2] = { foo_pg, myinc_pg};
+  const char * input_header_names[2] = { "multi2.h", "mydir/multi3.h"};
+
+  err = clCompileProgram(program_A,
+0, NULL, // num_devices & device_list
+NULL, // compile_options
+2, // num_input_headers
+input_headers,
+input_header_names,
+NULL, NULL);
+
+  OCL_ASSERT(err==CL_SUCCESS);
+  const char* file_name_B="multi_B.cl";
+  cl_program program_B;
+  init_program(file_name_B, ctx, &program_B);
+
+  err = clCompileProgram(program_B,
+0, NULL, // num_devices & device_list
+NULL, // compile_options
+2, // num_input_headers
+input_headers,
+input_header_names,
+NULL, NULL);
+
+  OCL_ASSERT(err==CL_SUCCESS);
+  cl_program input_programs[2] = { program_A, program_B};
+  cl_program linked_program = clLinkProgram(ctx, 0, NULL, NULL, 2, 
input_programs, NULL, NULL, &err);
+
+
+  OCL_ASSERT(linked_program != NULL);
+  OCL_ASSERT(err == CL_SUCCESS);
+
+  // link success, run this kernel.
+
+  const size_t n = 16;
+  int64_t src1[n], src2[n];
+
+  src1[0] = (int64_t)1 << 63, src2[0] = 0x7FFFll;
+  src1[1] = (int64_t)1 << 63, src2[1] = ((int64_t)1 << 63) | 1;
+  src1[2] = -1ll, src2[2] = 0;
+  src1[3] = ((int64_t)123 << 32) | 0x7FFF, src2[3] = ((int64_t)123 << 32) 
| 0x8000;
+

[Beignet] [PATCH V2 0/3] opencl-1.2 compile/link implementation.

2014-06-05 Thread xionghu . luo
From: Luo 

1. this patchset depends on the patch from master called "remove the code of 
saving the llvm bitcode to file, replace it with llvm::Module";
2. clBuildProgram path is independent of clCompileProgram/clLinkProgram. 
clBuildProgram allocates new LLVMContext, compile/link use the global 
LLVMContext.

Luo (3):
  add [opencl-1.2] API clCompileProgram.
  add [opencl-1.2] API clLinkProgram.
  add [opencl-1.2] test case runtime_cl.

 backend/src/backend/gen_program.cpp | 120 
 backend/src/backend/program.cpp | 178 
 backend/src/backend/program.h   |  38 
 kernels/multi2.h|   1 +
 kernels/multi_A.cl  |  13 +++
 kernels/multi_B.cl  |   9 ++
 kernels/mydir/multi3.h  |   4 +
 src/cl_api.c|  74 +++
 src/cl_gbe_loader.cpp   |  16 
 src/cl_program.c| 138 
 src/cl_program.h|  15 ++-
 utests/CMakeLists.txt   |   1 +
 utests/runtime_compile_link.cpp | 127 +
 13 files changed, 733 insertions(+), 1 deletion(-)
 create mode 100644 kernels/multi2.h
 create mode 100644 kernels/multi_A.cl
 create mode 100644 kernels/multi_B.cl
 create mode 100644 kernels/mydir/multi3.h
 create mode 100644 utests/runtime_compile_link.cpp

-- 
1.8.1.2

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH V2 1/3] add [opencl-1.2] API clCompileProgram.

2014-06-05 Thread xionghu . luo
From: Luo 

This API compiles a program's source for all the devices or a specific
device in the OpenCL context associated with program.
The pre-processor runs before the program sources are compiled.

Signed-off-by: Luo 
---
 backend/src/backend/program.cpp | 161 
 backend/src/backend/program.h   |  10 +++
 src/cl_api.c|  41 ++
 src/cl_gbe_loader.cpp   |   4 +
 src/cl_program.c| 103 +
 src/cl_program.h|   8 +-
 6 files changed, 326 insertions(+), 1 deletion(-)

diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index fab6bce..5af66fb 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -842,6 +842,165 @@ namespace gbe {
   }
 #endif
 
+#ifdef GBE_COMPILER_AVAILABLE
+  static gbe_program programCompileFromSource(uint32_t deviceID,
+  const char *source,
+  const char *temp_header_path,
+  size_t stringSize,
+  const char *options,
+  char *err,
+  size_t *errSize)
+  {
+char clStr[] = "/tmp/XX.cl";
+int clFd = mkstemps(clStr, 3);
+const std::string clName = std::string(clStr);
+std::string clOpt;
+
+FILE *clFile = fdopen(clFd, "w");
+FATAL_IF(clFile == NULL, "Failed to open temporary file");
+
+bool usePCH = OCL_USE_PCH;
+bool findPCH = false;
+
+/* Because our header file is so big, we want to avoid recompile the 
header from
+   scratch. We use the PCH support of Clang to save the huge compiling 
time.
+   We just use the most general build opt to build the PCH header file, so 
if
+   user pass new build options here, the PCH can not pass the Clang's 
compitable
+   validating. Clang will do three kinds of compatible check: Language 
Option,
+   Target Option and Preprocessing Option. Other kinds of options such as 
the
+   CodeGen options will not affect the AST result, so no need to check.
+
+   According to OpenCL 1.1's spec, the CL build options:
+   -D name=definition
+   If the definition is not used in our header, it is compitable
+
+   -cl-single-precision-constant
+   -cl-denorms-are-zero
+   -cl-std=
+   Language options, really affect.
+
+   -cl-opt-disable
+   -cl-mad-enable
+   -cl-no-signed-zeros
+   -cl-unsafe-math-optimizations
+   -cl-finite-math-only
+   -cl-fast-relaxed-math
+   CodeGen options, not affect
+
+   -Werror
+   -w
+   Our header should not block the compiling because of warning.
+
+   So we just disable the PCH validation of Clang and do the judgement by 
ourself. */
+
+if(options) {
+  char *p;
+  /* FIXME: Though we can disable the pch valid check, and load pch 
successfully,
+ but these language opts and pre-defined macro will still generate the 
diag msg
+ to the diag engine of the Clang and cause the Clang to report error.
+ We filter them all here to avoid these. */
+  const char * incompatible_opts[] = {
+  "-cl-single-precision-constant",
+//"-cl-denorms-are-zero",
+  "-cl-fast-relaxed-math",
+  "-cl-std=",
+  };
+  const char * incompatible_defs[] = {
+  "GET_FLOAT_WORD",
+  "__NV_CL_C_VERSION",
+  "GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND"
+  };
+
+  for (unsigned int i = 0; i < sizeof(incompatible_opts)/sizeof(char *); 
i++ ) {
+p = strstr(const_cast(options), incompatible_opts[i]);
+if (p) {
+  usePCH = false;
+  break;
+}
+  }
+
+  if (usePCH) {
+for (unsigned int i = 0; i < sizeof(incompatible_defs)/sizeof(char *); 
i++ ) {
+  p = strstr(const_cast(options), incompatible_defs[i]);
+  if (p) {
+usePCH = false;
+break;
+  }
+}
+  }
+
+
+  clOpt += options;
+}
+
+std::string dirs = OCL_PCH_PATH;
+std::istringstream idirs(dirs);
+std::string pchFileName;
+
+while (getline(idirs, pchFileName, ':')) {
+  if(access(pchFileName.c_str(), R_OK) == 0) {
+findPCH = true;
+break;
+  }
+}
+
+if (usePCH && findPCH) {
+  clOpt += " -include-pch ";
+  clOpt += pchFileName;
+  clOpt += " ";
+} else
+  fwrite(ocl_stdlib_str.c_str(), strlen(ocl_stdlib_str.c_str()), 1, 
clFile);
+
+if (!OCL_STRICT_CONFORMANCE) {
+fwrite(ocl_mathfunc_fastpath_str.c_str(), 
strlen(ocl_mathfunc_fastpath_str.c_str()), 1, clFile);
+}
+
+//for clCompilerProgram usage.
+if(temp_header_path){
+  clOpt += " -I ";
+  clOpt += temp_header_path;
+  clOpt += " ";
+}
+
+// reset the file number in case we have inse

[Beignet] [PATCH V3 2/3] add [opencl-1.2] API clLinkProgram.

2014-06-05 Thread xionghu . luo
From: Luo 

this API links a set of compiled program objects and libraries for all
the devices or a specific device(s) in the OpenCL context and creates
an executable.
the llvm bitcode in the compiled program objects are linked together and
built to Gen binary.

Signed-off-by: Luo 
---
 backend/src/backend/gen_program.cpp | 95 +
 backend/src/backend/program.cpp | 28 +--
 backend/src/backend/program.h   | 28 +++
 src/cl_api.c| 33 +
 src/cl_gbe_loader.cpp   | 12 +
 src/cl_program.c| 69 ---
 src/cl_program.h|  7 +++
 7 files changed, 251 insertions(+), 21 deletions(-)

diff --git a/backend/src/backend/gen_program.cpp 
b/backend/src/backend/gen_program.cpp
index 33f2ed6..bb1b4df 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -33,6 +33,9 @@
 #include "llvm/IR/DataLayout.h"
 #endif  /* LLVM_VERSION_MINOR <= 2 */
 
+#include "llvm/Linker.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
 #include "backend/program.h"
 #include "backend/gen_program.h"
 #include "backend/gen_program.hpp"
@@ -51,6 +54,7 @@
 #include 
 #include 
 #include 
+#include 
 
 namespace gbe {
 
@@ -232,6 +236,94 @@ namespace gbe {
 // Everything run fine
 return (gbe_program) program;
   }
+
+  static gbe_program genProgramNewGenProgram(uint32_t deviceID, const void* 
module, const void* llvm_ctx)
+  {
+using namespace gbe;
+GenProgram *program = GBE_NEW(GenProgram, deviceID, module, llvm_ctx);
+// Everything run fine
+return (gbe_program) program;
+  }
+
+  static void genProgramLinkFromLLVM(gbe_program   dst_program,
+ gbe_program   src_program,
+ size_tstringSize,
+ char *err,
+ size_t *  errSize)
+  {
+#ifdef GBE_COMPILER_AVAILABLE
+using namespace gbe;
+std::string errMsg;
+if(((GenProgram*)dst_program)->module == NULL){
+  ((GenProgram*)dst_program)->module = 
llvm::CloneModule((llvm::Module*)((GenProgram*)src_program)->module);
+  errSize = 0;
+}else{
+  //set the global variables and functions to link once to fix redefine.
+  llvm::Module* src = (llvm::Module*)((GenProgram*)src_program)->module;
+  for (llvm::Module::global_iterator I = src->global_begin(), E = 
src->global_end(); I != E; ++I) {
+I->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  }
+
+  for (llvm::Module::iterator I = src->begin(), E = src->end(); I != E; 
++I) {
+I->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  }
+
+  llvm::Module* dst = (llvm::Module*)((GenProgram*)dst_program)->module;
+  llvm::Linker::LinkModules( dst,
+ src,
+ llvm::Linker::PreserveSource,
+ &errMsg);
+  if (errMsg.c_str() != NULL) {
+if (err != NULL && errSize != NULL && stringSize > 0u) {
+  if(errMsg.length() < stringSize )
+stringSize = errMsg.length();
+  strcpy(err, errMsg.c_str());
+  err[stringSize+1] = '\0';
+}
+  }
+  printf("%s\n", err);
+}
+// Everything run fine
+#endif
+  }
+
+  static void genProgramBuildFromLLVM(gbe_program program,
+  size_t stringSize,
+  char *err,
+  size_t *errSize,
+  const char *  options)
+  {
+#ifdef GBE_COMPILER_AVAILABLE
+using namespace gbe;
+std::string error;
+
+int optLevel = 1;
+
+if(options) {
+  char *p;
+  p = strstr(const_cast(options), "-cl-opt-disable");
+  if (p)
+optLevel = 0;
+}
+
+GenProgram* p = (GenProgram*) program;
+// Try to compile the program
+static std::mutex gbe_mutex;
+gbe_mutex.lock();
+llvm::Module* module = (llvm::Module*)p->module;
+
+if (p->buildFromLLVMFile(NULL, module, error, optLevel) == false) {
+  if (err != NULL && errSize != NULL && stringSize > 0u) {
+const size_t msgSize = std::min(error.size(), stringSize-1u);
+std::memcpy(err, error.c_str(), msgSize);
+*errSize = error.size();
+  }
+  gbe_mutex.unlock();
+  GBE_DELETE(p);
+}
+#endif
+  }
+
 } /* namespace gbe */
 
 void genSetupCallBacks(void)
@@ -239,4 +331,7 @@ void genSetupCallBacks(void)
   gbe_program_new_from_binary = gbe::genProgramNewFromBinary;
   gbe_program_serialize_to_binary = gbe::genProgramSerializeToBinary;
   gbe_program_new_from_llvm = gbe::genProgramNewFromLLVM;
+  gbe_program_new_gen_program = gbe::genProgramNewGenProgram;
+  gbe_program_link_from_llvm = gbe::genProgramLinkFromLLVM;
+  gbe_progr

[Beignet] [PATCH V3 1/3] add [opencl-1.2] API clCompileProgram.

2014-06-05 Thread xionghu . luo
From: Luo 

This API compiles a program's source for all the devices or a specific
device in the OpenCL context associated with program.
The pre-processor runs before the program sources are compiled.

Signed-off-by: Luo 
---
 backend/src/backend/program.cpp | 162 
 backend/src/backend/program.h   |  10 +++
 src/cl_api.c|  41 ++
 src/cl_gbe_loader.cpp   |   4 +
 src/cl_program.c| 109 +++
 src/cl_program.h|   8 +-
 6 files changed, 333 insertions(+), 1 deletion(-)

diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index fab6bce..7cfc07e 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -842,6 +842,166 @@ namespace gbe {
   }
 #endif
 
+#ifdef GBE_COMPILER_AVAILABLE
+  static gbe_program programCompileFromSource(uint32_t deviceID,
+  const char *source,
+  const char *temp_header_path,
+  size_t stringSize,
+  const char *options,
+  char *err,
+  size_t *errSize)
+  {
+char clStr[] = "/tmp/XX.cl";
+int clFd = mkstemps(clStr, 3);
+const std::string clName = std::string(clStr);
+std::string clOpt;
+
+FILE *clFile = fdopen(clFd, "w");
+FATAL_IF(clFile == NULL, "Failed to open temporary file");
+
+bool usePCH = OCL_USE_PCH;
+bool findPCH = false;
+
+/* Because our header file is so big, we want to avoid recompile the 
header from
+   scratch. We use the PCH support of Clang to save the huge compiling 
time.
+   We just use the most general build opt to build the PCH header file, so 
if
+   user pass new build options here, the PCH can not pass the Clang's 
compitable
+   validating. Clang will do three kinds of compatible check: Language 
Option,
+   Target Option and Preprocessing Option. Other kinds of options such as 
the
+   CodeGen options will not affect the AST result, so no need to check.
+
+   According to OpenCL 1.1's spec, the CL build options:
+   -D name=definition
+   If the definition is not used in our header, it is compitable
+
+   -cl-single-precision-constant
+   -cl-denorms-are-zero
+   -cl-std=
+   Language options, really affect.
+
+   -cl-opt-disable
+   -cl-mad-enable
+   -cl-no-signed-zeros
+   -cl-unsafe-math-optimizations
+   -cl-finite-math-only
+   -cl-fast-relaxed-math
+   CodeGen options, not affect
+
+   -Werror
+   -w
+   Our header should not block the compiling because of warning.
+
+   So we just disable the PCH validation of Clang and do the judgement by 
ourself. */
+
+if(options) {
+  char *p;
+  /* FIXME: Though we can disable the pch valid check, and load pch 
successfully,
+ but these language opts and pre-defined macro will still generate the 
diag msg
+ to the diag engine of the Clang and cause the Clang to report error.
+ We filter them all here to avoid these. */
+  const char * incompatible_opts[] = {
+  "-cl-single-precision-constant",
+//"-cl-denorms-are-zero",
+  "-cl-fast-relaxed-math",
+  "-cl-std=",
+  };
+  const char * incompatible_defs[] = {
+  "GET_FLOAT_WORD",
+  "__NV_CL_C_VERSION",
+  "GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND"
+  };
+
+  for (unsigned int i = 0; i < sizeof(incompatible_opts)/sizeof(char *); 
i++ ) {
+p = strstr(const_cast(options), incompatible_opts[i]);
+if (p) {
+  usePCH = false;
+  break;
+}
+  }
+
+  if (usePCH) {
+for (unsigned int i = 0; i < sizeof(incompatible_defs)/sizeof(char *); 
i++ ) {
+  p = strstr(const_cast(options), incompatible_defs[i]);
+  if (p) {
+usePCH = false;
+break;
+  }
+}
+  }
+
+
+  clOpt += options;
+}
+
+std::string dirs = OCL_PCH_PATH;
+std::istringstream idirs(dirs);
+std::string pchFileName;
+
+while (getline(idirs, pchFileName, ':')) {
+  if(access(pchFileName.c_str(), R_OK) == 0) {
+findPCH = true;
+break;
+  }
+}
+
+if (usePCH && findPCH) {
+  clOpt += " -include-pch ";
+  clOpt += pchFileName;
+  clOpt += " ";
+} else
+  fwrite(ocl_stdlib_str.c_str(), strlen(ocl_stdlib_str.c_str()), 1, 
clFile);
+
+if (!OCL_STRICT_CONFORMANCE) {
+fwrite(ocl_mathfunc_fastpath_str.c_str(), 
strlen(ocl_mathfunc_fastpath_str.c_str()), 1, clFile);
+}
+
+//for clCompilerProgram usage.
+if(temp_header_path){
+  clOpt += " -I ";
+  clOpt += temp_header_path;
+  clOpt += " ";
+}
+
+// reset the file number in case we have in

[Beignet] [PATCH V3 3/3] add [opencl-1.2] test case runtime_compile_link.

2014-06-05 Thread xionghu . luo
From: Luo 

Signed-off-by: Luo 
---
 kernels/multi2.h|   1 +
 kernels/multi_A.cl  |  13 
 kernels/multi_B.cl  |   9 +++
 kernels/mydir/multi3.h  |   4 ++
 utests/CMakeLists.txt   |   1 +
 utests/runtime_compile_link.cpp | 127 
 6 files changed, 155 insertions(+)
 create mode 100644 kernels/multi2.h
 create mode 100644 kernels/multi_A.cl
 create mode 100644 kernels/multi_B.cl
 create mode 100644 kernels/mydir/multi3.h
 create mode 100644 utests/runtime_compile_link.cpp

diff --git a/kernels/multi2.h b/kernels/multi2.h
new file mode 100644
index 000..ae2c56e
--- /dev/null
+++ b/kernels/multi2.h
@@ -0,0 +1 @@
+int comp_long(long x, long y);
diff --git a/kernels/multi_A.cl b/kernels/multi_A.cl
new file mode 100644
index 000..9282b8d
--- /dev/null
+++ b/kernels/multi_A.cl
@@ -0,0 +1,13 @@
+#include "multi2.h"
+#include "mydir/multi3.h"
+
+int comp_long(long x, long y)
+{
+  return x < y ;
+}
+
+kernel void multi_A(global long *src1, global long *src2, global long *dst) {
+  int i = get_global_id(0);
+  int j = comp_long(src1[i], src2[i]);
+  dst[i] = j ? 3 : 4;
+}
diff --git a/kernels/multi_B.cl b/kernels/multi_B.cl
new file mode 100644
index 000..de147eb
--- /dev/null
+++ b/kernels/multi_B.cl
@@ -0,0 +1,9 @@
+#include "multi2.h"
+#include "mydir/multi3.h"
+
+kernel void multi_B(global long *src1, global long *src2, global long *dst) {
+  int i = get_global_id(0);
+  int j = comp_long(src1[i], src2[i]);
+  dst[i] = j ? 3 : 4;
+  int k = greater(src1[i], src2[i]);
+}
diff --git a/kernels/mydir/multi3.h b/kernels/mydir/multi3.h
new file mode 100644
index 000..4011278
--- /dev/null
+++ b/kernels/mydir/multi3.h
@@ -0,0 +1,4 @@
+inline int greater(long x, long y)
+{
+  return x > y ;
+}
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 698c9ff..bee3e8f 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -157,6 +157,7 @@ set (utests_sources
   runtime_event.cpp
   runtime_barrier_list.cpp
   runtime_marker_list.cpp
+  runtime_compile_link.cpp
   compiler_double.cpp
   compiler_double_2.cpp
   compiler_double_3.cpp
diff --git a/utests/runtime_compile_link.cpp b/utests/runtime_compile_link.cpp
new file mode 100644
index 000..17fe413
--- /dev/null
+++ b/utests/runtime_compile_link.cpp
@@ -0,0 +1,127 @@
+#include 
+#include 
+#include 
+#include "utest_helper.hpp"
+#include "utest_file_map.hpp"
+
+#define BUFFERSIZE  32*1024
+
+int init_program(const char* name, cl_context ctx, cl_program *pg )
+{
+  cl_int err;
+  char* ker_path = cl_do_kiss_path(name, device);
+
+  cl_file_map_t *fm = cl_file_map_new();
+  err = cl_file_map_open(fm, ker_path);
+  if(err != CL_FILE_MAP_SUCCESS)
+OCL_ASSERT(0);
+  const char *src = cl_file_map_begin(fm);
+
+  *pg = clCreateProgramWithSource(ctx, 1, &src, NULL, &err);
+  free(ker_path);
+  cl_file_map_delete(fm);
+  return 0;
+
+}
+
+void runtime_compile_link(void)
+{
+
+  cl_int err;
+
+  const char* header_file_name="multi2.h";
+  cl_program foo_pg;
+  init_program(header_file_name, ctx, &foo_pg);
+
+  const char* myinc_file_name="mydir/multi3.h";
+  cl_program myinc_pg;
+  init_program(myinc_file_name, ctx, &myinc_pg);
+
+  const char* file_name_A="multi_A.cl";
+  cl_program program_A;
+  init_program(file_name_A, ctx, &program_A);
+
+  cl_program input_headers[2] = { foo_pg, myinc_pg};
+  const char * input_header_names[2] = { "multi2.h", "mydir/multi3.h"};
+
+  err = clCompileProgram(program_A,
+0, NULL, // num_devices & device_list
+NULL, // compile_options
+2, // num_input_headers
+input_headers,
+input_header_names,
+NULL, NULL);
+
+  OCL_ASSERT(err==CL_SUCCESS);
+  const char* file_name_B="multi_B.cl";
+  cl_program program_B;
+  init_program(file_name_B, ctx, &program_B);
+
+  err = clCompileProgram(program_B,
+0, NULL, // num_devices & device_list
+NULL, // compile_options
+2, // num_input_headers
+input_headers,
+input_header_names,
+NULL, NULL);
+
+  OCL_ASSERT(err==CL_SUCCESS);
+  cl_program input_programs[2] = { program_A, program_B};
+  cl_program linked_program = clLinkProgram(ctx, 0, NULL, NULL, 2, 
input_programs, NULL, NULL, &err);
+
+
+  OCL_ASSERT(linked_program != NULL);
+  OCL_ASSERT(err == CL_SUCCESS);
+
+  // link success, run this kernel.
+
+  const size_t n = 16;
+  int64_t src1[n], src2[n];
+
+  src1[0] = (int64_t)1 << 63, src2[0] = 0x7FFFll;
+  src1[1] = (int64_t)1 << 63, src2[1] = ((int64_t)1 << 63) | 1;
+  src1[2] = -1ll, src2[2] = 0;
+  src1[3] = ((int64_t)123 << 32) | 0x7FFF, src2[3] = ((int64_t)123 << 32) 
| 0x8

[Beignet] [PATCH V2 0/3] opencl-1.2 compile/link implementation.

2014-06-05 Thread xionghu . luo
From: Luo 

1. this patchset depends on the patch from master called "remove the code of 
saving the llvm bitcode to file, replace it with llvm::Module";
2. clBuildProgram path is independent of clCompileProgram/clLinkProgram. 
clBuildProgram allocates new LLVMContext, compile/link use the global 
LLVMContext.

Luo (3):
  add [opencl-1.2] API clCompileProgram.
  add [opencl-1.2] API clLinkProgram.
  add [opencl-1.2] test case runtime_cl.

 backend/src/backend/gen_program.cpp | 120 
 backend/src/backend/program.cpp | 178 
 backend/src/backend/program.h   |  38 
 kernels/multi2.h|   1 +
 kernels/multi_A.cl  |  13 +++
 kernels/multi_B.cl  |   9 ++
 kernels/mydir/multi3.h  |   4 +
 src/cl_api.c|  74 +++
 src/cl_gbe_loader.cpp   |  16 
 src/cl_program.c| 138 
 src/cl_program.h|  15 ++-
 utests/CMakeLists.txt   |   1 +
 utests/runtime_compile_link.cpp | 127 +
 13 files changed, 733 insertions(+), 1 deletion(-)
 create mode 100644 kernels/multi2.h
 create mode 100644 kernels/multi_A.cl
 create mode 100644 kernels/multi_B.cl
 create mode 100644 kernels/mydir/multi3.h
 create mode 100644 utests/runtime_compile_link.cpp

-- 
1.8.1.2

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] add [opencl-1.2] clUnloadPlatformCompiler implementation

2014-06-05 Thread xionghu . luo
From: Luo 

just a empty hook.

Signed-off-by: Luo 
---
 src/cl_api.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/cl_api.c b/src/cl_api.c
index 8598088..824a952 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -939,6 +939,12 @@ clUnloadCompiler(void)
 }
 
 cl_int
+clUnloadPlatformCompiler(cl_platform_id platform)
+{
+  return CL_SUCCESS;
+}
+
+cl_int
 clGetProgramInfo(cl_program   program,
  cl_program_info  param_name,
  size_t   param_value_size,
-- 
1.8.1.2

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH V2] add [opencl-1.2] API clCreateSubDevice.

2014-06-05 Thread xionghu . luo
From: Luo 

creates an array of sub-devices that each reference a non-intersecting
set of compute units within in_device, according to a partition scheme
given by properties.
---
 src/cl_api.c   | 10 --
 src/cl_device_id.c |  6 ++
 src/cl_device_id.h |  7 +++
 src/cl_gt_device.h |  7 ++-
 4 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/src/cl_api.c b/src/cl_api.c
index 8598088..8264970 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -242,8 +242,14 @@ clCreateSubDevices(cl_device_id 
in_device,
cl_device_id *   out_devices,
cl_uint *num_devices_ret)
 {
-  NOT_IMPLEMENTED;
-  return 0;
+  /* Check parameter consistency */
+  if (UNLIKELY(out_devices == NULL && num_devices_ret == NULL))
+return CL_INVALID_VALUE;
+  if (UNLIKELY(in_device == NULL && properties == NULL))
+return CL_INVALID_VALUE;
+
+  *num_devices_ret = 0;
+  return CL_INVALID_DEVICE_PARTITION_COUNT;
 }
 
 cl_int
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 8ec7741..df37519 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -393,6 +393,12 @@ cl_get_device_info(cl_device_id device,
 DECL_STRING_FIELD(OPENCL_C_VERSION, opencl_c_version)
 DECL_STRING_FIELD(EXTENSIONS, extensions);
 DECL_STRING_FIELD(BUILT_IN_KERNELS, built_in_kernels)
+DECL_FIELD(PARENT_DEVICE, parent_device)
+DECL_FIELD(PARTITION_MAX_SUB_DEVICES, partition_max_sub_device)
+DECL_FIELD(PARTITION_PROPERTIES, partition_property)
+DECL_FIELD(PARTITION_AFFINITY_DOMAIN, affinity_domain)
+DECL_FIELD(PARTITION_TYPE, partition_type)
+DECL_FIELD(REFERENCE_COUNT, device_reference_count)
 
 case CL_DRIVER_VERSION:
   if (param_value_size_ret) {
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
index 2bbe98e..a5449a7 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -98,6 +98,13 @@ struct _cl_device_id {
   /* Kernel specific info that we're assigning statically */
   size_t wg_sz;
   size_t preferred_wg_sz_mul;
+  /* SubDevice specific info */
+  cl_device_id parent_device;
+  cl_uint  partition_max_sub_device;
+  cl_device_partition_property partition_property[3];
+  cl_device_affinity_domainaffinity_domain;
+  cl_device_partition_property partition_type[3];
+  cl_uint  device_reference_count;
 };
 
 /* Get a device from the given platform */
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index cab2c58..b8bda5e 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -101,5 +101,10 @@ DECL_INFO_STRING(built_in_kernels, 
"__cl_copy_region_align4;"
 
 DECL_INFO_STRING(driver_version, LIBCL_DRIVER_VERSION_STRING)
 #undef DECL_INFO_STRING
-
+.parent_device = NULL,
+.partition_max_sub_device = 1,
+.partition_property = {0},
+.affinity_domain = 0,
+.partition_type = {0},
+.device_reference_count = 1,
 
-- 
1.8.1.2

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH V3] remove the code of saving the llvm bitcode to file, replace it with llvm::Module

2014-06-06 Thread xionghu . luo
From: Luo 

Save the global LLVMContext and module pointer to GenProgram, delete the
module pointer in the destructor.

Signed-off-by: Luo 
---
 backend/src/backend/gen_program.cpp | 33 +++--
 backend/src/backend/gen_program.hpp |  4 +-
 backend/src/backend/program.cpp | 95 -
 backend/src/backend/program.h   |  3 +-
 backend/src/backend/program.hpp |  2 +-
 backend/src/llvm/llvm_to_gen.cpp| 16 ---
 backend/src/llvm/llvm_to_gen.hpp|  2 +-
 src/cl_program.c|  2 +-
 8 files changed, 100 insertions(+), 57 deletions(-)

diff --git a/backend/src/backend/gen_program.cpp 
b/backend/src/backend/gen_program.cpp
index d2e95d4..33f2ed6 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -22,6 +22,17 @@
  * \author Benjamin Segovia 
  */
 
+#include "llvm/Config/config.h"
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/DataLayout.h"
+#else
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/DataLayout.h"
+#endif  /* LLVM_VERSION_MINOR <= 2 */
+
 #include "backend/program.h"
 #include "backend/gen_program.h"
 #include "backend/gen_program.hpp"
@@ -33,6 +44,8 @@
 #include "ir/unit.hpp"
 #include "llvm/llvm_to_gen.hpp"
 
+#include 
+
 #include 
 #include 
 #include 
@@ -74,7 +87,19 @@ namespace gbe {
 #endif
   }
 
-  GenProgram::~GenProgram(void) {}
+  GenProgram::~GenProgram(void){
+#ifdef GBE_COMPILER_AVAILABLE
+if(module){
+  delete (llvm::Module*)module;
+  module = NULL;
+}
+
+if(llvm_ctx){
+  delete (llvm::LLVMContext*)llvm_ctx;
+  llvm_ctx = NULL;
+}
+#endif
+  }
 
   /*! We must avoid spilling at all cost with Gen */
   static const struct CodeGenStrategy {
@@ -182,17 +207,19 @@ namespace gbe {
 
   static gbe_program genProgramNewFromLLVM(uint32_t deviceID,
const char *fileName,
+   const void* module,
+   const void* llvm_ctx,
size_t stringSize,
char *err,
size_t *errSize,
int optLevel)
   {
 using namespace gbe;
-GenProgram *program = GBE_NEW(GenProgram, deviceID);
+GenProgram *program = GBE_NEW(GenProgram, deviceID, module, llvm_ctx);
 #ifdef GBE_COMPILER_AVAILABLE
 std::string error;
 // Try to compile the program
-if (program->buildFromLLVMFile(fileName, error, optLevel) == false) {
+if (program->buildFromLLVMFile(fileName, module, error, optLevel) == 
false) {
   if (err != NULL && errSize != NULL && stringSize > 0u) {
 const size_t msgSize = std::min(error.size(), stringSize-1u);
 std::memcpy(err, error.c_str(), msgSize);
diff --git a/backend/src/backend/gen_program.hpp 
b/backend/src/backend/gen_program.hpp
index ea54b49..70794c9 100644
--- a/backend/src/backend/gen_program.hpp
+++ b/backend/src/backend/gen_program.hpp
@@ -58,7 +58,7 @@ namespace gbe
   {
   public:
 /*! Create an empty program */
-GenProgram(uint32_t deviceID) : deviceID(deviceID) {}
+GenProgram(uint32_t deviceID, const void* mod = NULL, const void* ctx = 
NULL) : deviceID(deviceID),module((void*)mod), llvm_ctx((void*)ctx) {}
 /*! Current device ID*/
 uint32_t deviceID;
 /*! Destroy the program */
@@ -69,6 +69,8 @@ namespace gbe
 virtual Kernel *allocateKernel(const std::string &name) {
   return GBE_NEW(GenKernel, name);
 }
+void* module;
+void* llvm_ctx;
 /*! Use custom allocators */
 GBE_CLASS(GenProgram);
   };
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index 949aeb4..6e50761 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -34,6 +34,8 @@
 #include "llvm/Config/config.h"
 #include "llvm/Support/Threading.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/IR/LLVMContext.h"
 #include 
 #include 
 #include 
@@ -103,9 +105,13 @@ namespace gbe {
 #ifdef GBE_COMPILER_AVAILABLE
   BVAR(OCL_OUTPUT_GEN_IR, false);
 
-  bool Program::buildFromLLVMFile(const char *fileName, std::string &error, 
int optLevel) {
+  bool Program::buildFromLLVMFile(const char *fileName, const void* module, 
std::string &error, int optLevel) {
 ir::Unit *unit = new ir::Unit();
-if (llvmToGen(*unit, fileName, optLevel) == false) {
+llvm::Module * cloned_module = NULL;
+if(module){
+  cloned_module = llvm::CloneModule((llvm::Module*)module);
+}
+if (llvmToGen(*unit, fileName, module, optLevel) == false) {
   error = std::string(fileName) + " not found";
   return false;
 }
@@ -114,11 +120,18 @@ namespace gbe {
 if(!unit->getValid()) {
 

[Beignet] [PATCH V2 0/3] opencl-1.2 compile/link implementation.

2014-06-06 Thread xionghu . luo
From: Luo 

1. this patchset depends on the patch from master called "remove the code of 
saving the llvm bitcode to file, replace it with llvm::Module";
2. clBuildProgram path is independent of clCompileProgram/clLinkProgram. 
clBuildProgram allocates new LLVMContext, compile/link use the global 
LLVMContext.

Luo (3):
  add [opencl-1.2] API clCompileProgram.
  add [opencl-1.2] API clLinkProgram.
  add [opencl-1.2] test case runtime_cl.

 backend/src/backend/gen_program.cpp | 120 
 backend/src/backend/program.cpp | 178 
 backend/src/backend/program.h   |  38 
 kernels/multi2.h|   1 +
 kernels/multi_A.cl  |  13 +++
 kernels/multi_B.cl  |   9 ++
 kernels/mydir/multi3.h  |   4 +
 src/cl_api.c|  74 +++
 src/cl_gbe_loader.cpp   |  16 
 src/cl_program.c| 138 
 src/cl_program.h|  15 ++-
 utests/CMakeLists.txt   |   1 +
 utests/runtime_compile_link.cpp | 127 +
 13 files changed, 733 insertions(+), 1 deletion(-)
 create mode 100644 kernels/multi2.h
 create mode 100644 kernels/multi_A.cl
 create mode 100644 kernels/multi_B.cl
 create mode 100644 kernels/mydir/multi3.h
 create mode 100644 utests/runtime_compile_link.cpp

-- 
1.8.1.2

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH v4 1/3] add [opencl-1.2] API clCompileProgram.

2014-06-06 Thread xionghu . luo
From: Luo 

This API compiles a program's source for all the devices or a specific
device in the OpenCL context associated with program.
The pre-processor runs before the program sources are compiled.

Signed-off-by: Luo 
---
 backend/src/backend/gen_program.cpp |  9 
 backend/src/backend/program.cpp | 60 +++
 backend/src/backend/program.h   | 20 
 src/cl_api.c| 41 
 src/cl_gbe_loader.cpp   |  4 ++
 src/cl_program.c| 96 +
 src/cl_program.h|  8 +++-
 7 files changed, 237 insertions(+), 1 deletion(-)

diff --git a/backend/src/backend/gen_program.cpp 
b/backend/src/backend/gen_program.cpp
index 33f2ed6..1d19289 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -232,6 +232,14 @@ namespace gbe {
 // Everything run fine
 return (gbe_program) program;
   }
+
+  static gbe_program genProgramNewGenProgram(uint32_t deviceID, const 
+  void* module, const void* llvm_ctx)  {
+using namespace gbe;
+GenProgram *program = GBE_NEW(GenProgram, deviceID, module, llvm_ctx);
+// Everything run fine
+return (gbe_program) program;
+  }
 } /* namespace gbe */
 
 void genSetupCallBacks(void)
@@ -239,4 +247,5 @@ void genSetupCallBacks(void)
   gbe_program_new_from_binary = gbe::genProgramNewFromBinary;
   gbe_program_serialize_to_binary = gbe::genProgramSerializeToBinary;
   gbe_program_new_from_llvm = gbe::genProgramNewFromLLVM;
+  gbe_program_new_gen_program = gbe::genProgramNewGenProgram;
 }
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index d23529a..121153d 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -861,6 +861,52 @@ namespace gbe {
   }
 #endif
 
+#ifdef GBE_COMPILER_AVAILABLE
+
+  static gbe_program programCompileFromSource(uint32_t deviceID,
+  const char *source,
+  const char *temp_header_path,
+  size_t stringSize,
+  const char *options,
+  char *err,
+  size_t *errSize)
+  {
+int optLevel = 1;
+std::string clOpt;
+std::string clName;
+processSourceAndOption(source, options, temp_header_path, clOpt, clName, 
optLevel);
+
+gbe_program p;
+acquireLLVMContextLock();
+//FIXME: if use new allocated context to link two modules there would be 
context mismatch
+//for some functions, so we use global context now, need switch to new 
context later.
+llvm::Module * out_module;
+llvm::LLVMContext* llvm_ctx = &llvm::getGlobalContext();
+if (buildModuleFromSource(clName.c_str(), &out_module, llvm_ctx, 
clOpt.c_str(),
+  stringSize, err, errSize)) {
+// Now build the program from llvm
+  size_t clangErrSize = 0;
+  if (err != NULL) {
+GBE_ASSERT(errSize != NULL);
+stringSize -= *errSize;
+err += *errSize;
+clangErrSize = *errSize;
+  }
+
+  p = gbe_program_new_gen_program(deviceID, out_module, NULL);
+
+  if (err != NULL)
+*errSize += clangErrSize;
+  if (OCL_OUTPUT_BUILD_LOG && options)
+llvm::errs() << options;
+} else
+  p = NULL;
+remove(clName.c_str());
+releaseLLVMContextLock();
+return p;
+  }
+#endif
+
   static size_t programGetGlobalConstantSize(gbe_program gbeProgram) {
 if (gbeProgram == NULL) return 0;
 const gbe::Program *program = (const gbe::Program*) gbeProgram;
@@ -1024,10 +1070,23 @@ namespace gbe {
   }
 } /* namespace gbe */
 
+std::mutex llvm_ctx_mutex;
+void acquireLLVMContextLock()
+{
+  llvm_ctx_mutex.lock();
+}
+
+void releaseLLVMContextLock()
+{
+  llvm_ctx_mutex.unlock();
+}
+
 GBE_EXPORT_SYMBOL gbe_program_new_from_source_cb *gbe_program_new_from_source 
= NULL;
+GBE_EXPORT_SYMBOL gbe_program_compile_from_source_cb 
*gbe_program_compile_from_source = NULL;
 GBE_EXPORT_SYMBOL gbe_program_new_from_binary_cb *gbe_program_new_from_binary 
= NULL;
 GBE_EXPORT_SYMBOL gbe_program_serialize_to_binary_cb 
*gbe_program_serialize_to_binary = NULL;
 GBE_EXPORT_SYMBOL gbe_program_new_from_llvm_cb *gbe_program_new_from_llvm = 
NULL;
+GBE_EXPORT_SYMBOL gbe_program_new_gen_program_cb *gbe_program_new_gen_program 
= NULL; 
 GBE_EXPORT_SYMBOL gbe_program_get_global_constant_size_cb 
*gbe_program_get_global_constant_size = NULL;
 GBE_EXPORT_SYMBOL gbe_program_get_global_constant_data_cb 
*gbe_program_get_global_constant_data = NULL;
 GBE_EXPORT_SYMBOL gbe_program_delete_cb *gbe_program_delete = NULL;
@@ -1067,6 +1126,7 @@ namespace gbe
   {
 CallBackInitializer(void) {
   gbe_program_new_from_source = gbe::programNewFromSource;
+  gbe_program_compile_from_source = gbe::programCompileFromSource

[Beignet] [PATCH v4 3/3] add [opencl-1.2] test case runtime_compile_link.

2014-06-06 Thread xionghu . luo
From: Luo 

Signed-off-by: Luo 
---
 kernels/multi2.h|   1 +
 kernels/multi_A.cl  |  13 
 kernels/multi_B.cl  |   9 +++
 kernels/mydir/multi3.h  |   4 ++
 utests/CMakeLists.txt   |   1 +
 utests/runtime_compile_link.cpp | 127 
 6 files changed, 155 insertions(+)
 create mode 100644 kernels/multi2.h
 create mode 100644 kernels/multi_A.cl
 create mode 100644 kernels/multi_B.cl
 create mode 100644 kernels/mydir/multi3.h
 create mode 100644 utests/runtime_compile_link.cpp

diff --git a/kernels/multi2.h b/kernels/multi2.h
new file mode 100644
index 000..ae2c56e
--- /dev/null
+++ b/kernels/multi2.h
@@ -0,0 +1 @@
+int comp_long(long x, long y);
diff --git a/kernels/multi_A.cl b/kernels/multi_A.cl
new file mode 100644
index 000..9282b8d
--- /dev/null
+++ b/kernels/multi_A.cl
@@ -0,0 +1,13 @@
+#include "multi2.h"
+#include "mydir/multi3.h"
+
+int comp_long(long x, long y)
+{
+  return x < y ;
+}
+
+kernel void multi_A(global long *src1, global long *src2, global long *dst) {
+  int i = get_global_id(0);
+  int j = comp_long(src1[i], src2[i]);
+  dst[i] = j ? 3 : 4;
+}
diff --git a/kernels/multi_B.cl b/kernels/multi_B.cl
new file mode 100644
index 000..de147eb
--- /dev/null
+++ b/kernels/multi_B.cl
@@ -0,0 +1,9 @@
+#include "multi2.h"
+#include "mydir/multi3.h"
+
+kernel void multi_B(global long *src1, global long *src2, global long *dst) {
+  int i = get_global_id(0);
+  int j = comp_long(src1[i], src2[i]);
+  dst[i] = j ? 3 : 4;
+  int k = greater(src1[i], src2[i]);
+}
diff --git a/kernels/mydir/multi3.h b/kernels/mydir/multi3.h
new file mode 100644
index 000..4011278
--- /dev/null
+++ b/kernels/mydir/multi3.h
@@ -0,0 +1,4 @@
+inline int greater(long x, long y)
+{
+  return x > y ;
+}
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 698c9ff..bee3e8f 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -157,6 +157,7 @@ set (utests_sources
   runtime_event.cpp
   runtime_barrier_list.cpp
   runtime_marker_list.cpp
+  runtime_compile_link.cpp
   compiler_double.cpp
   compiler_double_2.cpp
   compiler_double_3.cpp
diff --git a/utests/runtime_compile_link.cpp b/utests/runtime_compile_link.cpp
new file mode 100644
index 000..17fe413
--- /dev/null
+++ b/utests/runtime_compile_link.cpp
@@ -0,0 +1,127 @@
+#include 
+#include 
+#include 
+#include "utest_helper.hpp"
+#include "utest_file_map.hpp"
+
+#define BUFFERSIZE  32*1024
+
+int init_program(const char* name, cl_context ctx, cl_program *pg )
+{
+  cl_int err;
+  char* ker_path = cl_do_kiss_path(name, device);
+
+  cl_file_map_t *fm = cl_file_map_new();
+  err = cl_file_map_open(fm, ker_path);
+  if(err != CL_FILE_MAP_SUCCESS)
+OCL_ASSERT(0);
+  const char *src = cl_file_map_begin(fm);
+
+  *pg = clCreateProgramWithSource(ctx, 1, &src, NULL, &err);
+  free(ker_path);
+  cl_file_map_delete(fm);
+  return 0;
+
+}
+
+void runtime_compile_link(void)
+{
+
+  cl_int err;
+
+  const char* header_file_name="multi2.h";
+  cl_program foo_pg;
+  init_program(header_file_name, ctx, &foo_pg);
+
+  const char* myinc_file_name="mydir/multi3.h";
+  cl_program myinc_pg;
+  init_program(myinc_file_name, ctx, &myinc_pg);
+
+  const char* file_name_A="multi_A.cl";
+  cl_program program_A;
+  init_program(file_name_A, ctx, &program_A);
+
+  cl_program input_headers[2] = { foo_pg, myinc_pg};
+  const char * input_header_names[2] = { "multi2.h", "mydir/multi3.h"};
+
+  err = clCompileProgram(program_A,
+0, NULL, // num_devices & device_list
+NULL, // compile_options
+2, // num_input_headers
+input_headers,
+input_header_names,
+NULL, NULL);
+
+  OCL_ASSERT(err==CL_SUCCESS);
+  const char* file_name_B="multi_B.cl";
+  cl_program program_B;
+  init_program(file_name_B, ctx, &program_B);
+
+  err = clCompileProgram(program_B,
+0, NULL, // num_devices & device_list
+NULL, // compile_options
+2, // num_input_headers
+input_headers,
+input_header_names,
+NULL, NULL);
+
+  OCL_ASSERT(err==CL_SUCCESS);
+  cl_program input_programs[2] = { program_A, program_B};
+  cl_program linked_program = clLinkProgram(ctx, 0, NULL, NULL, 2, 
input_programs, NULL, NULL, &err);
+
+
+  OCL_ASSERT(linked_program != NULL);
+  OCL_ASSERT(err == CL_SUCCESS);
+
+  // link success, run this kernel.
+
+  const size_t n = 16;
+  int64_t src1[n], src2[n];
+
+  src1[0] = (int64_t)1 << 63, src2[0] = 0x7FFFll;
+  src1[1] = (int64_t)1 << 63, src2[1] = ((int64_t)1 << 63) | 1;
+  src1[2] = -1ll, src2[2] = 0;
+  src1[3] = ((int64_t)123 << 32) | 0x7FFF, src2[3] = ((int64_t)123 << 32) 
| 0x8

[Beignet] [PATCH v4 2/3] add [opencl-1.2] API clLinkProgram.

2014-06-06 Thread xionghu . luo
From: Luo 

this API links a set of compiled program objects and libraries for all
the devices or a specific device(s) in the OpenCL context and creates
an executable.
the llvm bitcode in the compiled program objects are linked together and
built to Gen binary.

Signed-off-by: Luo 
---
 backend/src/backend/gen_program.cpp | 88 -
 backend/src/backend/program.cpp | 28 +---
 backend/src/backend/program.h   | 28 
 src/cl_api.c| 33 ++
 src/cl_gbe_loader.cpp   | 12 +
 src/cl_program.c| 49 +
 src/cl_program.h|  7 +++
 7 files changed, 238 insertions(+), 7 deletions(-)

diff --git a/backend/src/backend/gen_program.cpp 
b/backend/src/backend/gen_program.cpp
index 1d19289..ad9043b 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -33,6 +33,9 @@
 #include "llvm/IR/DataLayout.h"
 #endif  /* LLVM_VERSION_MINOR <= 2 */
 
+#include "llvm/Linker.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
 #include "backend/program.h"
 #include "backend/gen_program.h"
 #include "backend/gen_program.hpp"
@@ -51,6 +54,7 @@
 #include 
 #include 
 #include 
+#include 
 
 namespace gbe {
 
@@ -233,13 +237,91 @@ namespace gbe {
 return (gbe_program) program;
   }
 
-  static gbe_program genProgramNewGenProgram(uint32_t deviceID, const 
-  void* module, const void* llvm_ctx)  {
+  static gbe_program genProgramNewGenProgram(uint32_t deviceID, const void* 
module, const void* llvm_ctx)
+  {
 using namespace gbe;
 GenProgram *program = GBE_NEW(GenProgram, deviceID, module, llvm_ctx);
 // Everything run fine
 return (gbe_program) program;
   }
+
+  static void genProgramLinkFromLLVM(gbe_program   dst_program,
+ gbe_program   src_program,
+ size_tstringSize,
+ char *err,
+ size_t *  errSize)
+  {
+#ifdef GBE_COMPILER_AVAILABLE
+using namespace gbe;
+std::string errMsg;
+if(((GenProgram*)dst_program)->module == NULL){
+  ((GenProgram*)dst_program)->module = 
llvm::CloneModule((llvm::Module*)((GenProgram*)src_program)->module);
+  errSize = 0;
+}else{
+  //set the global variables and functions to link once to fix redefine.
+  llvm::Module* src = (llvm::Module*)((GenProgram*)src_program)->module;
+  for (llvm::Module::global_iterator I = src->global_begin(), E = 
src->global_end(); I != E; ++I) {
+I->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  }
+
+  for (llvm::Module::iterator I = src->begin(), E = src->end(); I != E; 
++I) {
+I->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+  }
+
+  llvm::Module* dst = (llvm::Module*)((GenProgram*)dst_program)->module;
+  llvm::Linker::LinkModules( dst,
+ src,
+ llvm::Linker::PreserveSource,
+ &errMsg);
+  if (errMsg.c_str() != NULL) {
+if (err != NULL && errSize != NULL && stringSize > 0u) {
+  if(errMsg.length() < stringSize )
+stringSize = errMsg.length();
+  strcpy(err, errMsg.c_str());
+  err[stringSize+1] = '\0';
+}
+  }
+}
+// Everything run fine
+#endif
+  }
+
+  static void genProgramBuildFromLLVM(gbe_program program,
+  size_t stringSize,
+  char *err,
+  size_t *errSize,
+  const char *  options)
+  {
+#ifdef GBE_COMPILER_AVAILABLE
+using namespace gbe;
+std::string error;
+
+int optLevel = 1;
+
+if(options) {
+  char *p;
+  p = strstr(const_cast(options), "-cl-opt-disable");
+  if (p)
+optLevel = 0;
+}
+
+GenProgram* p = (GenProgram*) program;
+// Try to compile the program
+acquireLLVMContextLock();
+llvm::Module* module = (llvm::Module*)p->module;
+
+if (p->buildFromLLVMFile(NULL, module, error, optLevel) == false) {
+  if (err != NULL && errSize != NULL && stringSize > 0u) {
+const size_t msgSize = std::min(error.size(), stringSize-1u);
+std::memcpy(err, error.c_str(), msgSize);
+*errSize = error.size();
+  }
+  GBE_DELETE(p);
+}
+releaseLLVMContextLock();
+#endif
+  }
+
 } /* namespace gbe */
 
 void genSetupCallBacks(void)
@@ -248,4 +330,6 @@ void genSetupCallBacks(void)
   gbe_program_serialize_to_binary = gbe::genProgramSerializeToBinary;
   gbe_program_new_from_llvm = gbe::genProgramNewFromLLVM;
   gbe_program_new_gen_program = gbe::genProgramNewGenProgram;
+  gbe_program_link_from_llvm = gbe::genProgramLinkFromLLVM;
+  gbe_program_build_from_llvm = gbe::

[Beignet] [PATCH v5 3/3] add [opencl-1.2] test case runtime_compile_link.

2014-06-09 Thread xionghu . luo
From: Luo 

Signed-off-by: Luo 
---
 kernels/include/runtime_compile_link_inc.h |   4 +
 kernels/runtime_compile_link.h |   1 +
 kernels/runtime_compile_link_a.cl  |  13 +++
 kernels/runtime_compile_link_b.cl  |   9 ++
 utests/CMakeLists.txt  |   1 +
 utests/runtime_compile_link.cpp| 127 +
 6 files changed, 155 insertions(+)
 create mode 100644 kernels/include/runtime_compile_link_inc.h
 create mode 100644 kernels/runtime_compile_link.h
 create mode 100644 kernels/runtime_compile_link_a.cl
 create mode 100644 kernels/runtime_compile_link_b.cl
 create mode 100644 utests/runtime_compile_link.cpp

diff --git a/kernels/include/runtime_compile_link_inc.h 
b/kernels/include/runtime_compile_link_inc.h
new file mode 100644
index 000..4011278
--- /dev/null
+++ b/kernels/include/runtime_compile_link_inc.h
@@ -0,0 +1,4 @@
+inline int greater(long x, long y)
+{
+  return x > y ;
+}
diff --git a/kernels/runtime_compile_link.h b/kernels/runtime_compile_link.h
new file mode 100644
index 000..ae2c56e
--- /dev/null
+++ b/kernels/runtime_compile_link.h
@@ -0,0 +1 @@
+int comp_long(long x, long y);
diff --git a/kernels/runtime_compile_link_a.cl 
b/kernels/runtime_compile_link_a.cl
new file mode 100644
index 000..b17861f
--- /dev/null
+++ b/kernels/runtime_compile_link_a.cl
@@ -0,0 +1,13 @@
+#include "runtime_compile_link.h"
+#include "include/runtime_compile_link_inc.h"
+
+int comp_long(long x, long y)
+{
+  return x < y ;
+}
+
+kernel void runtime_compile_link_a(global long *src1, global long *src2, 
global long *dst) {
+  int i = get_global_id(0);
+  int j = comp_long(src1[i], src2[i]);
+  dst[i] = j ? 3 : 4;
+}
diff --git a/kernels/runtime_compile_link_b.cl 
b/kernels/runtime_compile_link_b.cl
new file mode 100644
index 000..89b5a2d
--- /dev/null
+++ b/kernels/runtime_compile_link_b.cl
@@ -0,0 +1,9 @@
+#include "runtime_compile_link.h"
+#include "include/runtime_compile_link_inc.h"
+
+kernel void runtime_compile_link_b(global long *src1, global long *src2, 
global long *dst) {
+  int i = get_global_id(0);
+  int j = comp_long(src1[i], src2[i]);
+  dst[i] = j ? 3 : 4;
+  int k = greater(src1[i], src2[i]);
+}
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 698c9ff..bee3e8f 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -157,6 +157,7 @@ set (utests_sources
   runtime_event.cpp
   runtime_barrier_list.cpp
   runtime_marker_list.cpp
+  runtime_compile_link.cpp
   compiler_double.cpp
   compiler_double_2.cpp
   compiler_double_3.cpp
diff --git a/utests/runtime_compile_link.cpp b/utests/runtime_compile_link.cpp
new file mode 100644
index 000..8aeea31
--- /dev/null
+++ b/utests/runtime_compile_link.cpp
@@ -0,0 +1,127 @@
+#include 
+#include 
+#include 
+#include "utest_helper.hpp"
+#include "utest_file_map.hpp"
+
+#define BUFFERSIZE  32*1024
+
+int init_program(const char* name, cl_context ctx, cl_program *pg )
+{
+  cl_int err;
+  char* ker_path = cl_do_kiss_path(name, device);
+
+  cl_file_map_t *fm = cl_file_map_new();
+  err = cl_file_map_open(fm, ker_path);
+  if(err != CL_FILE_MAP_SUCCESS)
+OCL_ASSERT(0);
+  const char *src = cl_file_map_begin(fm);
+
+  *pg = clCreateProgramWithSource(ctx, 1, &src, NULL, &err);
+  free(ker_path);
+  cl_file_map_delete(fm);
+  return 0;
+
+}
+
+void runtime_compile_link(void)
+{
+
+  cl_int err;
+
+  const char* header_file_name="runtime_compile_link.h";
+  cl_program foo_pg;
+  init_program(header_file_name, ctx, &foo_pg);
+
+  const char* myinc_file_name="include/runtime_compile_link_inc.h";
+  cl_program myinc_pg;
+  init_program(myinc_file_name, ctx, &myinc_pg);
+
+  const char* file_name_A="runtime_compile_link_a.cl";
+  cl_program program_A;
+  init_program(file_name_A, ctx, &program_A);
+
+  cl_program input_headers[2] = { foo_pg, myinc_pg};
+  const char * input_header_names[2] = {header_file_name, myinc_file_name}; 
+
+  err = clCompileProgram(program_A,
+0, NULL, // num_devices & device_list
+NULL, // compile_options
+2, // num_input_headers
+input_headers,
+input_header_names,
+NULL, NULL);
+
+  OCL_ASSERT(err==CL_SUCCESS);
+  const char* file_name_B="runtime_compile_link_b.cl";
+  cl_program program_B;
+  init_program(file_name_B, ctx, &program_B);
+
+  err = clCompileProgram(program_B,
+0, NULL, // num_devices & device_list
+NULL, // compile_options
+2, // num_input_headers
+input_headers,
+input_header_names,
+NULL, NULL);
+
+  OCL_ASSERT(err==CL_SUCCESS);
+  cl_program input_programs[2] = { program_A, program_B};
+  cl_program linked_program = clLinkProgr

[Beignet] [PATCH] [opencl-1.2 add binary type support for compiled object and library.

2014-06-12 Thread xionghu . luo
From: Luo 

save the llvm bitecode to program->binary: insert a bite in front of the
bitcode stands for binary type(1 means COMPILED_OBJECT, 2 means LIBRARY);
load the binary to module by ParseIR.

create random directory to save compile header files.

Signed-off-by: Luo 
---
 backend/src/backend/gen_program.cpp | 77 +
 backend/src/backend/program.cpp |  1 +
 backend/src/backend/program.h   |  8 +++-
 src/cl_api.c| 25 ++--
 src/cl_gbe_loader.cpp   |  4 ++
 src/cl_khr_icd.c|  4 +-
 src/cl_program.c| 67 ++--
 src/cl_program.h|  1 +
 8 files changed, 170 insertions(+), 17 deletions(-)

diff --git a/backend/src/backend/gen_program.cpp 
b/backend/src/backend/gen_program.cpp
index ad9043b..5324b8c 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -35,6 +35,12 @@
 
 #include "llvm/Linker.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/IRReader/IRReader.h"
 
 #include "backend/program.h"
 #include "backend/gen_program.h"
@@ -55,6 +61,7 @@
 #include 
 #include 
 #include 
+#include 
 
 namespace gbe {
 
@@ -193,20 +200,75 @@ namespace gbe {
 return reinterpret_cast(program);
   }
 
-  static size_t genProgramSerializeToBinary(gbe_program program, char 
**binary) {
+  static gbe_program genProgramNewFromLLVMBinary(uint32_t deviceID, const char 
*binary, size_t size) {
+#ifdef GBE_COMPILER_AVAILABLE
+using namespace gbe;
+std::string binary_content;
+//the first bit stands for binary_type.
+binary_content.assign(binary+1, size-1);
+llvm::StringRef llvm_bin_str(binary_content);
+llvm::LLVMContext& c = llvm::getGlobalContext();
+llvm::SMDiagnostic Err;
+llvm::MemoryBuffer* memory_buffer = 
llvm::MemoryBuffer::getMemBuffer(llvm_bin_str, "llvm_bin_str");
+llvm::Module* module = llvm::ParseIR(memory_buffer, Err, c);
+
+GenProgram *program = GBE_NEW(GenProgram, deviceID, module);
+
+//program->printStatus(0, std::cout);
+return reinterpret_cast(program);
+#else
+  return NULL;
+#endif
+  }
+
+  static size_t genProgramSerializeToBinary(gbe_program program, char 
**binary, int binary_type) {
 using namespace gbe;
 size_t sz;
 std::ostringstream oss;
 GenProgram *prog = (GenProgram*)program;
 
-if ((sz = prog->serializeToBin(oss)) == 0) {
-  *binary = 0;
+//0 means GEN binary, 1 means LLVM bitcode compiled object, 2 means LLVM 
bitcode library
+if(binary_type == 0){
+  if ((sz = prog->serializeToBin(oss)) == 0) {
+*binary = 0;
+return 0;
+  }
+
+  *binary = (char *)malloc(sizeof(char) * sz);
+  memcpy(*binary, oss.str().c_str(), sz*sizeof(char));
+  return sz;
+}else{
+#ifdef GBE_COMPILER_AVAILABLE
+  char llStr[] = "/tmp/XX.ll";
+  int llFd = mkstemps(llStr, 3);
+  close(llFd);
+  const std::string llName = std::string(llStr);
+  std::string errorInfo;
+#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR > 3)
+  auto mode = llvm::sys::fs::F_Binary;
+#else
+  auto mode = llvm::raw_fd_ostream::F_Binary;
+#endif
+  llvm::raw_fd_ostream OS(llName.c_str(), errorInfo, mode);
+  llvm::WriteBitcodeToFile((llvm::Module*)prog->module, OS);
+  OS.close();
+  FILE* pfile = fopen(llName.c_str(), "rb");
+  fseek(pfile, 0, SEEK_END);
+  int llsz = ftell(pfile);
+  rewind(pfile);
+  *binary = (char *)malloc(sizeof(char) * (llsz+1) );
+  int result = fread(*binary+1, 1, llsz, pfile);
+  if(result != llsz){
+GBE_ASSERT(0);
+  }
+  *(*binary) = binary_type;
+  fclose(pfile);
+  remove(llName.c_str());
+  return llsz+1;
+#else
   return 0;
+#endif
 }
-
-*binary = (char *)malloc(sizeof(char) * sz);
-memcpy(*binary, oss.str().c_str(), sz*sizeof(char));
-return sz;
   }
 
   static gbe_program genProgramNewFromLLVM(uint32_t deviceID,
@@ -327,6 +389,7 @@ namespace gbe {
 void genSetupCallBacks(void)
 {
   gbe_program_new_from_binary = gbe::genProgramNewFromBinary;
+  gbe_program_new_from_llvm_binary = gbe::genProgramNewFromLLVMBinary;
   gbe_program_serialize_to_binary = gbe::genProgramSerializeToBinary;
   gbe_program_new_from_llvm = gbe::genProgramNewFromLLVM;
   gbe_program_new_gen_program = gbe::genProgramNewGenProgram;
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index b4c56b7..ebaf3d3 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -1099,6 +1099,7 @@ GBE_EXPORT_SYMBOL gbe_program_new_from_source_cb 
*gbe_program_new_from_source =
 GBE_EXPORT_SYMBOL gbe_program_compile_from_source_cb 
*gbe_program_compile_from_s

[Beignet] [PATCH] add binary type support for compiled object and library.

2014-06-15 Thread xionghu . luo
From: Luo 

save the llvm bitecode to program->binary: insert a bite in front of the
bitcode stands for binary type(1 means COMPILED_OBJECT, 2 means LIBRARY);
load the binary to module by ParseIR.

create random directory to save compile header files.
use strncpy and strncat to replace strcpy and strcat.

Signed-off-by: Luo 

Conflicts:
src/cl_api.c
src/cl_gbe_loader.cpp
src/cl_khr_icd.c
src/cl_program.c
---
 backend/src/backend/gen_program.cpp | 77 +
 backend/src/backend/program.cpp |  1 +
 backend/src/backend/program.h   |  8 +++-
 src/cl_api.c| 25 ++--
 src/cl_gbe_loader.cpp   | 25 +++-
 src/cl_gbe_loader.h | 10 +++--
 src/cl_program.c| 71 ++
 src/cl_program.h|  1 +
 8 files changed, 185 insertions(+), 33 deletions(-)

diff --git a/backend/src/backend/gen_program.cpp 
b/backend/src/backend/gen_program.cpp
index 300741e..2ef8307 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -35,6 +35,12 @@
 
 #include "llvm/Linker.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/IRReader/IRReader.h"
 
 #include "backend/program.h"
 #include "backend/gen_program.h"
@@ -55,6 +61,7 @@
 #include 
 #include 
 #include 
+#include 
 
 namespace gbe {
 
@@ -203,20 +210,75 @@ namespace gbe {
 return reinterpret_cast(program);
   }
 
-  static size_t genProgramSerializeToBinary(gbe_program program, char 
**binary) {
+  static gbe_program genProgramNewFromLLVMBinary(uint32_t deviceID, const char 
*binary, size_t size) {
+#ifdef GBE_COMPILER_AVAILABLE
+using namespace gbe;
+std::string binary_content;
+//the first bit stands for binary_type.
+binary_content.assign(binary+1, size-1);
+llvm::StringRef llvm_bin_str(binary_content);
+llvm::LLVMContext& c = llvm::getGlobalContext();
+llvm::SMDiagnostic Err;
+llvm::MemoryBuffer* memory_buffer = 
llvm::MemoryBuffer::getMemBuffer(llvm_bin_str, "llvm_bin_str");
+llvm::Module* module = llvm::ParseIR(memory_buffer, Err, c);
+
+GenProgram *program = GBE_NEW(GenProgram, deviceID, module);
+
+//program->printStatus(0, std::cout);
+return reinterpret_cast(program);
+#else
+  return NULL;
+#endif
+  }
+
+  static size_t genProgramSerializeToBinary(gbe_program program, char 
**binary, int binary_type) {
 using namespace gbe;
 size_t sz;
 std::ostringstream oss;
 GenProgram *prog = (GenProgram*)program;
 
-if ((sz = prog->serializeToBin(oss)) == 0) {
-  *binary = 0;
+//0 means GEN binary, 1 means LLVM bitcode compiled object, 2 means LLVM 
bitcode library
+if(binary_type == 0){
+  if ((sz = prog->serializeToBin(oss)) == 0) {
+*binary = 0;
+return 0;
+  }
+
+  *binary = (char *)malloc(sizeof(char) * sz);
+  memcpy(*binary, oss.str().c_str(), sz*sizeof(char));
+  return sz;
+}else{
+#ifdef GBE_COMPILER_AVAILABLE
+  char llStr[] = "/tmp/XX.ll";
+  int llFd = mkstemps(llStr, 3);
+  close(llFd);
+  const std::string llName = std::string(llStr);
+  std::string errorInfo;
+#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR > 3)
+  auto mode = llvm::sys::fs::F_Binary;
+#else
+  auto mode = llvm::raw_fd_ostream::F_Binary;
+#endif
+  llvm::raw_fd_ostream OS(llName.c_str(), errorInfo, mode);
+  llvm::WriteBitcodeToFile((llvm::Module*)prog->module, OS);
+  OS.close();
+  FILE* pfile = fopen(llName.c_str(), "rb");
+  fseek(pfile, 0, SEEK_END);
+  int llsz = ftell(pfile);
+  rewind(pfile);
+  *binary = (char *)malloc(sizeof(char) * (llsz+1) );
+  int result = fread(*binary+1, 1, llsz, pfile);
+  if(result != llsz){
+GBE_ASSERT(0);
+  }
+  *(*binary) = binary_type;
+  fclose(pfile);
+  remove(llName.c_str());
+  return llsz+1;
+#else
   return 0;
+#endif
 }
-
-*binary = (char *)malloc(sizeof(char) * sz);
-memcpy(*binary, oss.str().c_str(), sz*sizeof(char));
-return sz;
   }
 
   static gbe_program genProgramNewFromLLVM(uint32_t deviceID,
@@ -337,6 +399,7 @@ namespace gbe {
 void genSetupCallBacks(void)
 {
   gbe_program_new_from_binary = gbe::genProgramNewFromBinary;
+  gbe_program_new_from_llvm_binary = gbe::genProgramNewFromLLVMBinary;
   gbe_program_serialize_to_binary = gbe::genProgramSerializeToBinary;
   gbe_program_new_from_llvm = gbe::genProgramNewFromLLVM;
   gbe_program_new_gen_program = gbe::genProgramNewGenProgram;
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index 45983fd..98e7ab7 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ 

[Beignet] [PATCH V2] add binary type support for compiled object and library.

2014-06-16 Thread xionghu . luo
From: Luo 

save the llvm bitcode to program->binary: insert a byte in front of the
bitcode stands for binary type(1 means COMPILED_OBJECT, 2 means LIBRARY);
load the binary to module by ParseIR.

create random directory to save compile header files.
use strncpy and strncat to replace strcpy and strcat.

Signed-off-by: Luo 
---
 backend/src/backend/gen_program.cpp | 68 ++-
 backend/src/backend/program.cpp |  1 +
 backend/src/backend/program.h   |  8 +++--
 src/cl_api.c| 25 +++--
 src/cl_gbe_loader.cpp   | 11 --
 src/cl_gbe_loader.h | 10 +++---
 src/cl_program.c| 72 +
 src/cl_program.h|  1 +
 8 files changed, 169 insertions(+), 27 deletions(-)

diff --git a/backend/src/backend/gen_program.cpp 
b/backend/src/backend/gen_program.cpp
index 300741e..b31 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -35,6 +35,12 @@
 
 #include "llvm/Linker.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/IRReader/IRReader.h"
 
 #include "backend/program.h"
 #include "backend/gen_program.h"
@@ -55,6 +61,7 @@
 #include 
 #include 
 #include 
+#include 
 
 namespace gbe {
 
@@ -188,7 +195,7 @@ namespace gbe {
   static gbe_program genProgramNewFromBinary(uint32_t deviceID, const char 
*binary, size_t size) {
 using namespace gbe;
 std::string binary_content;
-binary_content.assign(binary, size);
+binary_content.assign(binary+5, size-5);
 GenProgram *program = GBE_NEW(GenProgram, deviceID);
 std::istringstream ifs(binary_content, std::ostringstream::binary);
 // FIXME we need to check the whether the current device ID match the 
binary file's.
@@ -203,20 +210,64 @@ namespace gbe {
 return reinterpret_cast(program);
   }
 
-  static size_t genProgramSerializeToBinary(gbe_program program, char 
**binary) {
+  static gbe_program genProgramNewFromLLVMBinary(uint32_t deviceID, const char 
*binary, size_t size) {
+#ifdef GBE_COMPILER_AVAILABLE
+using namespace gbe;
+std::string binary_content;
+//the first byte stands for binary_type.
+binary_content.assign(binary+1, size-1);
+llvm::StringRef llvm_bin_str(binary_content);
+llvm::LLVMContext& c = llvm::getGlobalContext();
+llvm::SMDiagnostic Err;
+llvm::MemoryBuffer* memory_buffer = 
llvm::MemoryBuffer::getMemBuffer(llvm_bin_str, "llvm_bin_str");
+llvm::Module* module = llvm::ParseIR(memory_buffer, Err, c);
+if(module == NULL){
+  GBE_ASSERT(0);
+}
+
+GenProgram *program = GBE_NEW(GenProgram, deviceID, module);
+
+//program->printStatus(0, std::cout);
+return reinterpret_cast(program);
+#else
+  return NULL;
+#endif
+  }
+
+  static size_t genProgramSerializeToBinary(gbe_program program, char 
**binary, int binary_type) {
 using namespace gbe;
 size_t sz;
 std::ostringstream oss;
 GenProgram *prog = (GenProgram*)program;
 
-if ((sz = prog->serializeToBin(oss)) == 0) {
-  *binary = 0;
+//0 means GEN binary, 1 means LLVM bitcode compiled object, 2 means LLVM 
bitcode library
+if(binary_type == 0){
+  if ((sz = prog->serializeToBin(oss)) == 0) {
+*binary = NULL;
+return 0;
+  }
+
+  //add header to differetiate from llvm bitcode binary.
+  //the header length is 5 bytes: 1 binary type, 4 bitcode header.
+  *binary = (char *)malloc(sizeof(char) * (sz+5) );
+  memset(*binary, 0, sizeof(char) * (sz+5) );
+  memcpy(*binary+5, oss.str().c_str(), sz*sizeof(char));
+  return sz+5;
+}else{
+#ifdef GBE_COMPILER_AVAILABLE
+  std::string str;
+  llvm::raw_string_ostream OS(str);
+  llvm::WriteBitcodeToFile((llvm::Module*)prog->module, OS);
+  std::string& bin_str = OS.str();
+  int llsz = bin_str.size();
+  *binary = (char *)malloc(sizeof(char) * (llsz+1) );
+  *(*binary) = binary_type;
+  memcpy(*binary+1, bin_str.c_str(), llsz);
+  return llsz+1;
+#else
   return 0;
+#endif
 }
-
-*binary = (char *)malloc(sizeof(char) * sz);
-memcpy(*binary, oss.str().c_str(), sz*sizeof(char));
-return sz;
   }
 
   static gbe_program genProgramNewFromLLVM(uint32_t deviceID,
@@ -337,6 +388,7 @@ namespace gbe {
 void genSetupCallBacks(void)
 {
   gbe_program_new_from_binary = gbe::genProgramNewFromBinary;
+  gbe_program_new_from_llvm_binary = gbe::genProgramNewFromLLVMBinary;
   gbe_program_serialize_to_binary = gbe::genProgramSerializeToBinary;
   gbe_program_new_from_llvm = gbe::genProgramNewFromLLVM;
   gbe_program_new_gen_program = gbe::genProgramNewGenProgram;
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index 45983fd..98e7

[Beignet] [PATCH V2] add binary type support for compiled object and library.

2014-06-16 Thread xionghu . luo
From: Luo 

save the llvm bitcode to program->binary: insert a byte in front of the
bitcode stands for binary type(1 means COMPILED_OBJECT, 2 means LIBRARY);
load the binary to module by ParseIR.

create random directory to save compile header files.
use strncpy and strncat to replace strcpy and strcat.

Signed-off-by: Luo 
---
 backend/src/backend/gen_program.cpp | 68 ++-
 backend/src/backend/program.cpp |  1 +
 backend/src/backend/program.h   |  8 +++--
 src/cl_api.c| 25 +++--
 src/cl_gbe_loader.cpp   | 11 --
 src/cl_gbe_loader.h | 10 +++---
 src/cl_program.c| 72 +
 src/cl_program.h|  1 +
 8 files changed, 169 insertions(+), 27 deletions(-)

diff --git a/backend/src/backend/gen_program.cpp 
b/backend/src/backend/gen_program.cpp
index 300741e..b31 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -35,6 +35,12 @@
 
 #include "llvm/Linker.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/IRReader/IRReader.h"
 
 #include "backend/program.h"
 #include "backend/gen_program.h"
@@ -55,6 +61,7 @@
 #include 
 #include 
 #include 
+#include 
 
 namespace gbe {
 
@@ -188,7 +195,7 @@ namespace gbe {
   static gbe_program genProgramNewFromBinary(uint32_t deviceID, const char 
*binary, size_t size) {
 using namespace gbe;
 std::string binary_content;
-binary_content.assign(binary, size);
+binary_content.assign(binary+5, size-5);
 GenProgram *program = GBE_NEW(GenProgram, deviceID);
 std::istringstream ifs(binary_content, std::ostringstream::binary);
 // FIXME we need to check the whether the current device ID match the 
binary file's.
@@ -203,20 +210,64 @@ namespace gbe {
 return reinterpret_cast(program);
   }
 
-  static size_t genProgramSerializeToBinary(gbe_program program, char 
**binary) {
+  static gbe_program genProgramNewFromLLVMBinary(uint32_t deviceID, const char 
*binary, size_t size) {
+#ifdef GBE_COMPILER_AVAILABLE
+using namespace gbe;
+std::string binary_content;
+//the first byte stands for binary_type.
+binary_content.assign(binary+1, size-1);
+llvm::StringRef llvm_bin_str(binary_content);
+llvm::LLVMContext& c = llvm::getGlobalContext();
+llvm::SMDiagnostic Err;
+llvm::MemoryBuffer* memory_buffer = 
llvm::MemoryBuffer::getMemBuffer(llvm_bin_str, "llvm_bin_str");
+llvm::Module* module = llvm::ParseIR(memory_buffer, Err, c);
+if(module == NULL){
+  GBE_ASSERT(0);
+}
+
+GenProgram *program = GBE_NEW(GenProgram, deviceID, module);
+
+//program->printStatus(0, std::cout);
+return reinterpret_cast(program);
+#else
+  return NULL;
+#endif
+  }
+
+  static size_t genProgramSerializeToBinary(gbe_program program, char 
**binary, int binary_type) {
 using namespace gbe;
 size_t sz;
 std::ostringstream oss;
 GenProgram *prog = (GenProgram*)program;
 
-if ((sz = prog->serializeToBin(oss)) == 0) {
-  *binary = 0;
+//0 means GEN binary, 1 means LLVM bitcode compiled object, 2 means LLVM 
bitcode library
+if(binary_type == 0){
+  if ((sz = prog->serializeToBin(oss)) == 0) {
+*binary = NULL;
+return 0;
+  }
+
+  //add header to differetiate from llvm bitcode binary.
+  //the header length is 5 bytes: 1 binary type, 4 bitcode header.
+  *binary = (char *)malloc(sizeof(char) * (sz+5) );
+  memset(*binary, 0, sizeof(char) * (sz+5) );
+  memcpy(*binary+5, oss.str().c_str(), sz*sizeof(char));
+  return sz+5;
+}else{
+#ifdef GBE_COMPILER_AVAILABLE
+  std::string str;
+  llvm::raw_string_ostream OS(str);
+  llvm::WriteBitcodeToFile((llvm::Module*)prog->module, OS);
+  std::string& bin_str = OS.str();
+  int llsz = bin_str.size();
+  *binary = (char *)malloc(sizeof(char) * (llsz+1) );
+  *(*binary) = binary_type;
+  memcpy(*binary+1, bin_str.c_str(), llsz);
+  return llsz+1;
+#else
   return 0;
+#endif
 }
-
-*binary = (char *)malloc(sizeof(char) * sz);
-memcpy(*binary, oss.str().c_str(), sz*sizeof(char));
-return sz;
   }
 
   static gbe_program genProgramNewFromLLVM(uint32_t deviceID,
@@ -337,6 +388,7 @@ namespace gbe {
 void genSetupCallBacks(void)
 {
   gbe_program_new_from_binary = gbe::genProgramNewFromBinary;
+  gbe_program_new_from_llvm_binary = gbe::genProgramNewFromLLVMBinary;
   gbe_program_serialize_to_binary = gbe::genProgramSerializeToBinary;
   gbe_program_new_from_llvm = gbe::genProgramNewFromLLVM;
   gbe_program_new_gen_program = gbe::genProgramNewGenProgram;
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index 45983fd..98e7

[Beignet] [PATCH V3] add binary type support for compiled object and library.

2014-06-16 Thread xionghu . luo
From: Luo 

save the llvm bitcode to program->binary: insert a byte in front of the
bitcode stands for binary type(0 means GEN binary, 1 means COMPILED_OBJECT, 2 
means LIBRARY);
load the binary to module by ParseIR.

create random directory to save compile header files.
use strncpy and strncat to replace strcpy and strcat.

Signed-off-by: Luo 
---
 backend/src/backend/gen_program.cpp | 71 +++-
 backend/src/backend/program.cpp |  1 +
 backend/src/backend/program.h   |  8 +++--
 src/cl_api.c| 25 +++--
 src/cl_gbe_loader.cpp   | 11 --
 src/cl_gbe_loader.h | 10 +++---
 src/cl_program.c| 72 +
 src/cl_program.h|  1 +
 8 files changed, 172 insertions(+), 27 deletions(-)

diff --git a/backend/src/backend/gen_program.cpp 
b/backend/src/backend/gen_program.cpp
index 300741e..8897dbb 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -35,6 +35,12 @@
 
 #include "llvm/Linker.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/IRReader/IRReader.h"
 
 #include "backend/program.h"
 #include "backend/gen_program.h"
@@ -55,6 +61,7 @@
 #include 
 #include 
 #include 
+#include 
 
 namespace gbe {
 
@@ -188,7 +195,8 @@ namespace gbe {
   static gbe_program genProgramNewFromBinary(uint32_t deviceID, const char 
*binary, size_t size) {
 using namespace gbe;
 std::string binary_content;
-binary_content.assign(binary, size);
+//the first 5 bytes are header to differentiate from llvm bitcode binary.
+binary_content.assign(binary+5, size-5);
 GenProgram *program = GBE_NEW(GenProgram, deviceID);
 std::istringstream ifs(binary_content, std::ostringstream::binary);
 // FIXME we need to check the whether the current device ID match the 
binary file's.
@@ -203,20 +211,66 @@ namespace gbe {
 return reinterpret_cast(program);
   }
 
-  static size_t genProgramSerializeToBinary(gbe_program program, char 
**binary) {
+  static gbe_program genProgramNewFromLLVMBinary(uint32_t deviceID, const char 
*binary, size_t size) {
+#ifdef GBE_COMPILER_AVAILABLE
+using namespace gbe;
+std::string binary_content;
+//the first byte stands for binary_type.
+binary_content.assign(binary+1, size-1);
+llvm::StringRef llvm_bin_str(binary_content);
+llvm::LLVMContext& c = llvm::getGlobalContext();
+llvm::SMDiagnostic Err;
+llvm::MemoryBuffer* memory_buffer = 
llvm::MemoryBuffer::getMemBuffer(llvm_bin_str, "llvm_bin_str");
+acquireLLVMContextLock();
+llvm::Module* module = llvm::ParseIR(memory_buffer, Err, c);
+releaseLLVMContextLock();
+if(module == NULL){
+  GBE_ASSERT(0);
+}
+
+GenProgram *program = GBE_NEW(GenProgram, deviceID, module);
+
+//program->printStatus(0, std::cout);
+return reinterpret_cast(program);
+#else
+  return NULL;
+#endif
+  }
+
+  static size_t genProgramSerializeToBinary(gbe_program program, char 
**binary, int binary_type) {
 using namespace gbe;
 size_t sz;
 std::ostringstream oss;
 GenProgram *prog = (GenProgram*)program;
 
-if ((sz = prog->serializeToBin(oss)) == 0) {
-  *binary = 0;
+//0 means GEN binary, 1 means LLVM bitcode compiled object, 2 means LLVM 
bitcode library
+if(binary_type == 0){
+  if ((sz = prog->serializeToBin(oss)) == 0) {
+*binary = NULL;
+return 0;
+  }
+
+  //add header to differetiate from llvm bitcode binary.
+  //the header length is 5 bytes: 1 binary type, 4 bitcode header.
+  *binary = (char *)malloc(sizeof(char) * (sz+5) );
+  memset(*binary, 0, sizeof(char) * (sz+5) );
+  memcpy(*binary+5, oss.str().c_str(), sz*sizeof(char));
+  return sz+5;
+}else{
+#ifdef GBE_COMPILER_AVAILABLE
+  std::string str;
+  llvm::raw_string_ostream OS(str);
+  llvm::WriteBitcodeToFile((llvm::Module*)prog->module, OS);
+  std::string& bin_str = OS.str();
+  int llsz = bin_str.size();
+  *binary = (char *)malloc(sizeof(char) * (llsz+1) );
+  *(*binary) = binary_type;
+  memcpy(*binary+1, bin_str.c_str(), llsz);
+  return llsz+1;
+#else
   return 0;
+#endif
 }
-
-*binary = (char *)malloc(sizeof(char) * sz);
-memcpy(*binary, oss.str().c_str(), sz*sizeof(char));
-return sz;
   }
 
   static gbe_program genProgramNewFromLLVM(uint32_t deviceID,
@@ -337,6 +391,7 @@ namespace gbe {
 void genSetupCallBacks(void)
 {
   gbe_program_new_from_binary = gbe::genProgramNewFromBinary;
+  gbe_program_new_from_llvm_binary = gbe::genProgramNewFromLLVMBinary;
   gbe_program_serialize_to_binary = gbe::genProgramSerializeToBinary;
   gbe_program_new_from_llvm = gbe::genProgramNewFromLLVM

[Beignet] [PATCH] fix clEnqueueMarkerWithWaitList bug when input event is null.

2014-06-17 Thread xionghu . luo
From: Luo 

---
 src/cl_api.c   |  5 +
 src/cl_event.c | 11 ---
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/cl_api.c b/src/cl_api.c
index 2f287e3..c4a8730 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -2992,10 +2992,7 @@ clEnqueueMarkerWithWaitList(cl_command_queue 
command_queue,
 {
   cl_int err = CL_SUCCESS;
   CHECK_QUEUE(command_queue);
-  if(event == NULL) {
-err = CL_INVALID_VALUE;
-goto error;
-  }
+
   TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, 
event, command_queue->ctx);
 
   cl_event_marker_with_wait_list(command_queue, num_events_in_wait_list, 
event_wait_list, event);
diff --git a/src/cl_event.c b/src/cl_event.c
index c93d245..76d6760 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -476,11 +476,16 @@ cl_int cl_event_marker_with_wait_list(cl_command_queue 
queue,
 cl_event* event)
 {
   enqueue_data data = { 0 };
+  cl_event e;
 
-  *event = cl_event_new(queue->ctx, queue, CL_COMMAND_MARKER, CL_TRUE);
-  if(event == NULL)
+  e = cl_event_new(queue->ctx, queue, CL_COMMAND_MARKER, CL_TRUE);
+  if(e == NULL)
 return CL_OUT_OF_HOST_MEMORY;
 
+  if(event != NULL ){
+*event = e;
+  }
+
 //enqueues a marker command which waits for either a list of events to 
complete, or if the list is
 //empty it waits for all commands previously enqueued in command_queue to 
complete before it  completes.
   if(num_events_in_wait_list > 0){
@@ -499,7 +504,7 @@ cl_int cl_event_marker_with_wait_list(cl_command_queue 
queue,
 cl_gpgpu_event_update_status(queue->last_event->gpgpu_event, 1);
   }
 
-  cl_event_set_status(*event, CL_COMPLETE);
+  cl_event_set_status(e, CL_COMPLETE);
   return CL_SUCCESS;
 }
 
-- 
1.8.1.2

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH V4] add binary type support for compiled object and library.

2014-06-17 Thread xionghu . luo
From: Luo 

save the llvm bitcode to program->binary: insert a byte in front of the
bitcode stands for binary type(0 means GEN binary, 1 means COMPILED_OBJECT, 2 
means LIBRARY);
load the binary to module by ParseIR.

create random directory to save compile header files.
use strncpy and strncat to replace strcpy and strcat.

Signed-off-by: Luo 
---
 backend/src/backend/gen_program.cpp | 71 +++-
 backend/src/backend/program.cpp |  1 +
 backend/src/backend/program.h   |  8 +++--
 backend/src/gbe_bin_generater.cpp   | 15 +++-
 src/cl_api.c| 25 +++--
 src/cl_gbe_loader.cpp   | 11 --
 src/cl_gbe_loader.h | 10 +++---
 src/cl_program.c| 72 +
 src/cl_program.h|  1 +
 9 files changed, 186 insertions(+), 28 deletions(-)

diff --git a/backend/src/backend/gen_program.cpp 
b/backend/src/backend/gen_program.cpp
index 300741e..8897dbb 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -35,6 +35,12 @@
 
 #include "llvm/Linker.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/IRReader/IRReader.h"
 
 #include "backend/program.h"
 #include "backend/gen_program.h"
@@ -55,6 +61,7 @@
 #include 
 #include 
 #include 
+#include 
 
 namespace gbe {
 
@@ -188,7 +195,8 @@ namespace gbe {
   static gbe_program genProgramNewFromBinary(uint32_t deviceID, const char 
*binary, size_t size) {
 using namespace gbe;
 std::string binary_content;
-binary_content.assign(binary, size);
+//the first 5 bytes are header to differentiate from llvm bitcode binary.
+binary_content.assign(binary+5, size-5);
 GenProgram *program = GBE_NEW(GenProgram, deviceID);
 std::istringstream ifs(binary_content, std::ostringstream::binary);
 // FIXME we need to check the whether the current device ID match the 
binary file's.
@@ -203,20 +211,66 @@ namespace gbe {
 return reinterpret_cast(program);
   }
 
-  static size_t genProgramSerializeToBinary(gbe_program program, char 
**binary) {
+  static gbe_program genProgramNewFromLLVMBinary(uint32_t deviceID, const char 
*binary, size_t size) {
+#ifdef GBE_COMPILER_AVAILABLE
+using namespace gbe;
+std::string binary_content;
+//the first byte stands for binary_type.
+binary_content.assign(binary+1, size-1);
+llvm::StringRef llvm_bin_str(binary_content);
+llvm::LLVMContext& c = llvm::getGlobalContext();
+llvm::SMDiagnostic Err;
+llvm::MemoryBuffer* memory_buffer = 
llvm::MemoryBuffer::getMemBuffer(llvm_bin_str, "llvm_bin_str");
+acquireLLVMContextLock();
+llvm::Module* module = llvm::ParseIR(memory_buffer, Err, c);
+releaseLLVMContextLock();
+if(module == NULL){
+  GBE_ASSERT(0);
+}
+
+GenProgram *program = GBE_NEW(GenProgram, deviceID, module);
+
+//program->printStatus(0, std::cout);
+return reinterpret_cast(program);
+#else
+  return NULL;
+#endif
+  }
+
+  static size_t genProgramSerializeToBinary(gbe_program program, char 
**binary, int binary_type) {
 using namespace gbe;
 size_t sz;
 std::ostringstream oss;
 GenProgram *prog = (GenProgram*)program;
 
-if ((sz = prog->serializeToBin(oss)) == 0) {
-  *binary = 0;
+//0 means GEN binary, 1 means LLVM bitcode compiled object, 2 means LLVM 
bitcode library
+if(binary_type == 0){
+  if ((sz = prog->serializeToBin(oss)) == 0) {
+*binary = NULL;
+return 0;
+  }
+
+  //add header to differetiate from llvm bitcode binary.
+  //the header length is 5 bytes: 1 binary type, 4 bitcode header.
+  *binary = (char *)malloc(sizeof(char) * (sz+5) );
+  memset(*binary, 0, sizeof(char) * (sz+5) );
+  memcpy(*binary+5, oss.str().c_str(), sz*sizeof(char));
+  return sz+5;
+}else{
+#ifdef GBE_COMPILER_AVAILABLE
+  std::string str;
+  llvm::raw_string_ostream OS(str);
+  llvm::WriteBitcodeToFile((llvm::Module*)prog->module, OS);
+  std::string& bin_str = OS.str();
+  int llsz = bin_str.size();
+  *binary = (char *)malloc(sizeof(char) * (llsz+1) );
+  *(*binary) = binary_type;
+  memcpy(*binary+1, bin_str.c_str(), llsz);
+  return llsz+1;
+#else
   return 0;
+#endif
 }
-
-*binary = (char *)malloc(sizeof(char) * sz);
-memcpy(*binary, oss.str().c_str(), sz*sizeof(char));
-return sz;
   }
 
   static gbe_program genProgramNewFromLLVM(uint32_t deviceID,
@@ -337,6 +391,7 @@ namespace gbe {
 void genSetupCallBacks(void)
 {
   gbe_program_new_from_binary = gbe::genProgramNewFromBinary;
+  gbe_program_new_from_llvm_binary = gbe::genProgramNewFromLLVMBinary;
   gbe_program_serialize_to_binary = gbe::genProgramSerializeToBinary;
   gbe

[Beignet] [PATCH V5] add binary type support for compiled object and library.

2014-06-17 Thread xionghu . luo
From: Luo 

save the llvm bitcode to program->binary: insert a byte in front of the
bitcode stands for binary type(0 means GEN binary, 1 means COMPILED_OBJECT, 2 
means LIBRARY);
load the binary to module by ParseIR.

create random directory to save compile header files.
use strncpy and strncat to replace strcpy and strcat.

Signed-off-by: Luo 
---
 backend/src/backend/gen_program.cpp | 71 +++-
 backend/src/backend/program.cpp |  1 +
 backend/src/backend/program.h   |  8 +++--
 backend/src/gbe_bin_generater.cpp   | 16 -
 src/cl_api.c| 25 +++--
 src/cl_gbe_loader.cpp   | 11 --
 src/cl_gbe_loader.h | 10 +++---
 src/cl_program.c| 72 +
 src/cl_program.h|  1 +
 9 files changed, 187 insertions(+), 28 deletions(-)

diff --git a/backend/src/backend/gen_program.cpp 
b/backend/src/backend/gen_program.cpp
index 300741e..8897dbb 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -35,6 +35,12 @@
 
 #include "llvm/Linker.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/IRReader/IRReader.h"
 
 #include "backend/program.h"
 #include "backend/gen_program.h"
@@ -55,6 +61,7 @@
 #include 
 #include 
 #include 
+#include 
 
 namespace gbe {
 
@@ -188,7 +195,8 @@ namespace gbe {
   static gbe_program genProgramNewFromBinary(uint32_t deviceID, const char 
*binary, size_t size) {
 using namespace gbe;
 std::string binary_content;
-binary_content.assign(binary, size);
+//the first 5 bytes are header to differentiate from llvm bitcode binary.
+binary_content.assign(binary+5, size-5);
 GenProgram *program = GBE_NEW(GenProgram, deviceID);
 std::istringstream ifs(binary_content, std::ostringstream::binary);
 // FIXME we need to check the whether the current device ID match the 
binary file's.
@@ -203,20 +211,66 @@ namespace gbe {
 return reinterpret_cast(program);
   }
 
-  static size_t genProgramSerializeToBinary(gbe_program program, char 
**binary) {
+  static gbe_program genProgramNewFromLLVMBinary(uint32_t deviceID, const char 
*binary, size_t size) {
+#ifdef GBE_COMPILER_AVAILABLE
+using namespace gbe;
+std::string binary_content;
+//the first byte stands for binary_type.
+binary_content.assign(binary+1, size-1);
+llvm::StringRef llvm_bin_str(binary_content);
+llvm::LLVMContext& c = llvm::getGlobalContext();
+llvm::SMDiagnostic Err;
+llvm::MemoryBuffer* memory_buffer = 
llvm::MemoryBuffer::getMemBuffer(llvm_bin_str, "llvm_bin_str");
+acquireLLVMContextLock();
+llvm::Module* module = llvm::ParseIR(memory_buffer, Err, c);
+releaseLLVMContextLock();
+if(module == NULL){
+  GBE_ASSERT(0);
+}
+
+GenProgram *program = GBE_NEW(GenProgram, deviceID, module);
+
+//program->printStatus(0, std::cout);
+return reinterpret_cast(program);
+#else
+  return NULL;
+#endif
+  }
+
+  static size_t genProgramSerializeToBinary(gbe_program program, char 
**binary, int binary_type) {
 using namespace gbe;
 size_t sz;
 std::ostringstream oss;
 GenProgram *prog = (GenProgram*)program;
 
-if ((sz = prog->serializeToBin(oss)) == 0) {
-  *binary = 0;
+//0 means GEN binary, 1 means LLVM bitcode compiled object, 2 means LLVM 
bitcode library
+if(binary_type == 0){
+  if ((sz = prog->serializeToBin(oss)) == 0) {
+*binary = NULL;
+return 0;
+  }
+
+  //add header to differetiate from llvm bitcode binary.
+  //the header length is 5 bytes: 1 binary type, 4 bitcode header.
+  *binary = (char *)malloc(sizeof(char) * (sz+5) );
+  memset(*binary, 0, sizeof(char) * (sz+5) );
+  memcpy(*binary+5, oss.str().c_str(), sz*sizeof(char));
+  return sz+5;
+}else{
+#ifdef GBE_COMPILER_AVAILABLE
+  std::string str;
+  llvm::raw_string_ostream OS(str);
+  llvm::WriteBitcodeToFile((llvm::Module*)prog->module, OS);
+  std::string& bin_str = OS.str();
+  int llsz = bin_str.size();
+  *binary = (char *)malloc(sizeof(char) * (llsz+1) );
+  *(*binary) = binary_type;
+  memcpy(*binary+1, bin_str.c_str(), llsz);
+  return llsz+1;
+#else
   return 0;
+#endif
 }
-
-*binary = (char *)malloc(sizeof(char) * sz);
-memcpy(*binary, oss.str().c_str(), sz*sizeof(char));
-return sz;
   }
 
   static gbe_program genProgramNewFromLLVM(uint32_t deviceID,
@@ -337,6 +391,7 @@ namespace gbe {
 void genSetupCallBacks(void)
 {
   gbe_program_new_from_binary = gbe::genProgramNewFromBinary;
+  gbe_program_new_from_llvm_binary = gbe::genProgramNewFromLLVMBinary;
   gbe_program_serialize_to_binary = gbe::genProgramSerializeToBinary;
   gb

[Beignet] [PATCH v6] add binary type support for compiled object and library.

2014-06-17 Thread xionghu . luo
From: Luo 

save the llvm bitcode to program->binary: insert a byte in front of the
bitcode stands for binary type(0 means GEN binary, 1 means COMPILED_OBJECT, 2 
means LIBRARY);
load the binary to module by ParseIR.

create random directory to save compile header files.
use strncpy and strncat to replace strcpy and strcat.

v6: fix enqueue_copy_fill bug, use '\0' instead of 0 in the header.

Signed-off-by: Luo 
---
 backend/src/backend/gen_program.cpp | 71 +++-
 backend/src/backend/program.cpp |  1 +
 backend/src/backend/program.h   |  8 +++--
 backend/src/gbe_bin_generater.cpp   | 20 ++-
 src/cl_api.c| 25 +++--
 src/cl_gbe_loader.cpp   | 11 --
 src/cl_gbe_loader.h | 10 +++---
 src/cl_program.c| 72 +
 src/cl_program.h|  1 +
 9 files changed, 191 insertions(+), 28 deletions(-)

diff --git a/backend/src/backend/gen_program.cpp 
b/backend/src/backend/gen_program.cpp
index 300741e..8897dbb 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -35,6 +35,12 @@
 
 #include "llvm/Linker.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/IRReader/IRReader.h"
 
 #include "backend/program.h"
 #include "backend/gen_program.h"
@@ -55,6 +61,7 @@
 #include 
 #include 
 #include 
+#include 
 
 namespace gbe {
 
@@ -188,7 +195,8 @@ namespace gbe {
   static gbe_program genProgramNewFromBinary(uint32_t deviceID, const char 
*binary, size_t size) {
 using namespace gbe;
 std::string binary_content;
-binary_content.assign(binary, size);
+//the first 5 bytes are header to differentiate from llvm bitcode binary.
+binary_content.assign(binary+5, size-5);
 GenProgram *program = GBE_NEW(GenProgram, deviceID);
 std::istringstream ifs(binary_content, std::ostringstream::binary);
 // FIXME we need to check the whether the current device ID match the 
binary file's.
@@ -203,20 +211,66 @@ namespace gbe {
 return reinterpret_cast(program);
   }
 
-  static size_t genProgramSerializeToBinary(gbe_program program, char 
**binary) {
+  static gbe_program genProgramNewFromLLVMBinary(uint32_t deviceID, const char 
*binary, size_t size) {
+#ifdef GBE_COMPILER_AVAILABLE
+using namespace gbe;
+std::string binary_content;
+//the first byte stands for binary_type.
+binary_content.assign(binary+1, size-1);
+llvm::StringRef llvm_bin_str(binary_content);
+llvm::LLVMContext& c = llvm::getGlobalContext();
+llvm::SMDiagnostic Err;
+llvm::MemoryBuffer* memory_buffer = 
llvm::MemoryBuffer::getMemBuffer(llvm_bin_str, "llvm_bin_str");
+acquireLLVMContextLock();
+llvm::Module* module = llvm::ParseIR(memory_buffer, Err, c);
+releaseLLVMContextLock();
+if(module == NULL){
+  GBE_ASSERT(0);
+}
+
+GenProgram *program = GBE_NEW(GenProgram, deviceID, module);
+
+//program->printStatus(0, std::cout);
+return reinterpret_cast(program);
+#else
+  return NULL;
+#endif
+  }
+
+  static size_t genProgramSerializeToBinary(gbe_program program, char 
**binary, int binary_type) {
 using namespace gbe;
 size_t sz;
 std::ostringstream oss;
 GenProgram *prog = (GenProgram*)program;
 
-if ((sz = prog->serializeToBin(oss)) == 0) {
-  *binary = 0;
+//0 means GEN binary, 1 means LLVM bitcode compiled object, 2 means LLVM 
bitcode library
+if(binary_type == 0){
+  if ((sz = prog->serializeToBin(oss)) == 0) {
+*binary = NULL;
+return 0;
+  }
+
+  //add header to differetiate from llvm bitcode binary.
+  //the header length is 5 bytes: 1 binary type, 4 bitcode header.
+  *binary = (char *)malloc(sizeof(char) * (sz+5) );
+  memset(*binary, 0, sizeof(char) * (sz+5) );
+  memcpy(*binary+5, oss.str().c_str(), sz*sizeof(char));
+  return sz+5;
+}else{
+#ifdef GBE_COMPILER_AVAILABLE
+  std::string str;
+  llvm::raw_string_ostream OS(str);
+  llvm::WriteBitcodeToFile((llvm::Module*)prog->module, OS);
+  std::string& bin_str = OS.str();
+  int llsz = bin_str.size();
+  *binary = (char *)malloc(sizeof(char) * (llsz+1) );
+  *(*binary) = binary_type;
+  memcpy(*binary+1, bin_str.c_str(), llsz);
+  return llsz+1;
+#else
   return 0;
+#endif
 }
-
-*binary = (char *)malloc(sizeof(char) * sz);
-memcpy(*binary, oss.str().c_str(), sz*sizeof(char));
-return sz;
   }
 
   static gbe_program genProgramNewFromLLVM(uint32_t deviceID,
@@ -337,6 +391,7 @@ namespace gbe {
 void genSetupCallBacks(void)
 {
   gbe_program_new_from_binary = gbe::genProgramNewFromBinary;
+  gbe_program_new_from_llvm_binary = gbe::genProgramNewFromLLVMBinary;
   gb

[Beignet] [PATCH v7] add binary type support for compiled object and library.

2014-06-17 Thread xionghu . luo
From: Luo 

save the llvm bitcode to program->binary: insert a byte in front of the
bitcode stands for binary type(0 means GEN binary, 1 means COMPILED_OBJECT, 2 
means LIBRARY);
load the binary to module by ParseIR.

create random directory to save compile header files.
use strncpy and strncat to replace strcpy and strcat.

v6: fix enqueue_copy_fill bug, use '\0' instead of 0 in the header.
v7  binary header format issue: fix test_load_program_from_bin bug of 
standalone kernel generated by gbe_bin_generater.

Signed-off-by: Luo 
---
 backend/src/backend/gen_program.cpp | 71 +++-
 backend/src/backend/program.cpp |  1 +
 backend/src/backend/program.h   |  8 +++--
 backend/src/gbe_bin_generater.cpp   | 58 --
 src/cl_api.c| 25 +++--
 src/cl_gbe_loader.cpp   | 11 --
 src/cl_gbe_loader.h | 10 +++---
 src/cl_program.c| 72 +
 src/cl_program.h|  1 +
 9 files changed, 211 insertions(+), 46 deletions(-)

diff --git a/backend/src/backend/gen_program.cpp 
b/backend/src/backend/gen_program.cpp
index 300741e..8897dbb 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -35,6 +35,12 @@
 
 #include "llvm/Linker.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/IRReader/IRReader.h"
 
 #include "backend/program.h"
 #include "backend/gen_program.h"
@@ -55,6 +61,7 @@
 #include 
 #include 
 #include 
+#include 
 
 namespace gbe {
 
@@ -188,7 +195,8 @@ namespace gbe {
   static gbe_program genProgramNewFromBinary(uint32_t deviceID, const char 
*binary, size_t size) {
 using namespace gbe;
 std::string binary_content;
-binary_content.assign(binary, size);
+//the first 5 bytes are header to differentiate from llvm bitcode binary.
+binary_content.assign(binary+5, size-5);
 GenProgram *program = GBE_NEW(GenProgram, deviceID);
 std::istringstream ifs(binary_content, std::ostringstream::binary);
 // FIXME we need to check the whether the current device ID match the 
binary file's.
@@ -203,20 +211,66 @@ namespace gbe {
 return reinterpret_cast(program);
   }
 
-  static size_t genProgramSerializeToBinary(gbe_program program, char 
**binary) {
+  static gbe_program genProgramNewFromLLVMBinary(uint32_t deviceID, const char 
*binary, size_t size) {
+#ifdef GBE_COMPILER_AVAILABLE
+using namespace gbe;
+std::string binary_content;
+//the first byte stands for binary_type.
+binary_content.assign(binary+1, size-1);
+llvm::StringRef llvm_bin_str(binary_content);
+llvm::LLVMContext& c = llvm::getGlobalContext();
+llvm::SMDiagnostic Err;
+llvm::MemoryBuffer* memory_buffer = 
llvm::MemoryBuffer::getMemBuffer(llvm_bin_str, "llvm_bin_str");
+acquireLLVMContextLock();
+llvm::Module* module = llvm::ParseIR(memory_buffer, Err, c);
+releaseLLVMContextLock();
+if(module == NULL){
+  GBE_ASSERT(0);
+}
+
+GenProgram *program = GBE_NEW(GenProgram, deviceID, module);
+
+//program->printStatus(0, std::cout);
+return reinterpret_cast(program);
+#else
+  return NULL;
+#endif
+  }
+
+  static size_t genProgramSerializeToBinary(gbe_program program, char 
**binary, int binary_type) {
 using namespace gbe;
 size_t sz;
 std::ostringstream oss;
 GenProgram *prog = (GenProgram*)program;
 
-if ((sz = prog->serializeToBin(oss)) == 0) {
-  *binary = 0;
+//0 means GEN binary, 1 means LLVM bitcode compiled object, 2 means LLVM 
bitcode library
+if(binary_type == 0){
+  if ((sz = prog->serializeToBin(oss)) == 0) {
+*binary = NULL;
+return 0;
+  }
+
+  //add header to differetiate from llvm bitcode binary.
+  //the header length is 5 bytes: 1 binary type, 4 bitcode header.
+  *binary = (char *)malloc(sizeof(char) * (sz+5) );
+  memset(*binary, 0, sizeof(char) * (sz+5) );
+  memcpy(*binary+5, oss.str().c_str(), sz*sizeof(char));
+  return sz+5;
+}else{
+#ifdef GBE_COMPILER_AVAILABLE
+  std::string str;
+  llvm::raw_string_ostream OS(str);
+  llvm::WriteBitcodeToFile((llvm::Module*)prog->module, OS);
+  std::string& bin_str = OS.str();
+  int llsz = bin_str.size();
+  *binary = (char *)malloc(sizeof(char) * (llsz+1) );
+  *(*binary) = binary_type;
+  memcpy(*binary+1, bin_str.c_str(), llsz);
+  return llsz+1;
+#else
   return 0;
+#endif
 }
-
-*binary = (char *)malloc(sizeof(char) * sz);
-memcpy(*binary, oss.str().c_str(), sz*sizeof(char));
-return sz;
   }
 
   static gbe_program genProgramNewFromLLVM(uint32_t deviceID,
@@ -337,6 +391,7 @@ namespace gbe {
 void genSetupCallBacks(void)
 {

[Beignet] [PATCH] [opencl-1.2] implement API clEnqueueFillImage.

2014-06-22 Thread xionghu . luo
From: Luo 

enqueues a command to fill an image object with a specified color.

fix typo cl_context_get_static_kernel_from_bin.

Signed-off-by: Luo 
---
 src/CMakeLists.txt |   4 +-
 src/cl_api.c   |  73 +
 src/cl_context.c   |   2 +-
 src/cl_context.h   |   7 +-
 src/cl_enqueue.c   |   1 +
 src/cl_enqueue.h   |   1 +
 src/cl_gt_device.h |   7 +-
 src/cl_khr_icd.c   |   2 +-
 src/cl_mem.c   | 106 -
 src/cl_mem.h   |   3 +
 src/kernels/cl_internal_fill_image_1d.cl   |  14 
 src/kernels/cl_internal_fill_image_1d_array.cl |  15 
 src/kernels/cl_internal_fill_image_2d.cl   |  15 
 src/kernels/cl_internal_fill_image_2d_array.cl |  16 
 src/kernels/cl_internal_fill_image_3d.cl   |  16 
 15 files changed, 257 insertions(+), 25 deletions(-)
 create mode 100644 src/kernels/cl_internal_fill_image_1d.cl
 create mode 100644 src/kernels/cl_internal_fill_image_1d_array.cl
 create mode 100644 src/kernels/cl_internal_fill_image_2d.cl
 create mode 100644 src/kernels/cl_internal_fill_image_2d_array.cl
 create mode 100644 src/kernels/cl_internal_fill_image_3d.cl

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 8651af6..3d5ce4d 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -47,7 +47,9 @@ cl_internal_copy_image_2d_to_buffer 
cl_internal_copy_image_3d_to_buffer
 cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_3d
 cl_internal_fill_buf_align8 cl_internal_fill_buf_align4
 cl_internal_fill_buf_align2 cl_internal_fill_buf_unalign
-cl_internal_fill_buf_align128)
+cl_internal_fill_buf_align128 cl_internal_fill_image_1d 
+cl_internal_fill_image_1d_array cl_internal_fill_image_2d 
+cl_internal_fill_image_2d_array cl_internal_fill_image_3d)
 set (BUILT_IN_NAME  cl_internal_built_in_kernel)
 MakeBuiltInKernelStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}")
 MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}")
diff --git a/src/cl_api.c b/src/cl_api.c
index 32f91d7..c93957f 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -1812,6 +1812,79 @@ error:
 }
 
 cl_int
+clEnqueueFillImage(cl_command_queue   command_queue,
+   cl_mem image, 
+   const void *   fill_color, 
+   const size_t * porigin, 
+   const size_t * pregion, 
+   cl_uintnum_events_in_wait_list,
+   const cl_event *   event_wait_list,
+   cl_event * event)
+{
+  cl_int err = CL_SUCCESS;
+  enqueue_data *data, no_wait_data = { 0 };
+
+  CHECK_QUEUE(command_queue);
+  CHECK_IMAGE(image, src_image);
+  FIXUP_IMAGE_REGION(src_image, pregion, region);
+  FIXUP_IMAGE_ORIGIN(src_image, porigin, origin);
+
+  if (command_queue->ctx != image->ctx) {
+err = CL_INVALID_CONTEXT;
+goto error;
+  }
+
+  if (fill_color == NULL) {
+err = CL_INVALID_VALUE;
+goto error;
+  }
+
+  if (!origin || !region || origin[0] + region[0] > src_image->w || origin[1] 
+ region[1] > src_image->h || origin[2] + region[2] > src_image->depth) {
+ err = CL_INVALID_VALUE;
+ goto error;
+  }
+
+  if (src_image->image_type == CL_MEM_OBJECT_IMAGE2D && (origin[2] != 0 || 
region[2] != 1)){
+err = CL_INVALID_VALUE;
+goto error;
+  }
+
+  if (src_image->image_type == CL_MEM_OBJECT_IMAGE1D && (origin[2] != 0 
||origin[1] != 0 || region[2] != 1 || region[1] != 1)){
+err = CL_INVALID_VALUE;
+goto error;
+  }
+
+  err = cl_image_fill(command_queue, fill_color, src_image, origin, region);
+  if (err) {
+goto error;
+  }
+
+  TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, 
event, image->ctx);
+
+  data = &no_wait_data;
+  data->type = EnqueueFillImage;
+  data->queue = command_queue;
+
+  if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
+   event, data, CL_COMMAND_FILL_BUFFER) == 
CL_ENQUEUE_EXECUTE_IMM) {
+if (event && (*event)->type != CL_COMMAND_USER
+&& (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) {
+  cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT);
+}
+
+err = cl_command_queue_flush(command_queue);
+  }
+
+  if(b_output_kernel_perf)
+time_end(command_queue->ctx, "beignet internal kernel : cl_fill_image", 
"", command_queue);
+
+  return 0;
+
+ error:
+  return err;
+}
+
+cl_int
 clEnqueueFillBuffer(cl_command_queue   command_queue,
 cl_mem buffer,
 const void *   pattern,
diff --git a/src/cl_context.c b/src/cl_context.c
index 8f42a58..152faf3 100644
--- a/src/cl_context.c
+++ b/src/cl_context.c
@@ -319,7 +319,7 @@ cl_contex

[Beignet] [PATCH] add cpu copy for 1Darray and 2darray related copy APIs.

2014-06-24 Thread xionghu . luo
From: Luo 

detail cases: 1Darray, 2Darray, 2Darrayto2D, 2Darrayto3D, 2Dto2Darray, 
3Dto2Darray.

1d used gpu copy.

Signed-off-by: Luo 
---
 src/CMakeLists.txt |  4 +-
 src/cl_context.h   |  1 +
 src/cl_mem.c   | 73 +-
 src/cl_mem.h   |  4 ++
 src/kernels/cl_internal_copy_image_1d_to_1d.cl | 19 +++
 5 files changed, 97 insertions(+), 4 deletions(-)
 create mode 100644 src/kernels/cl_internal_copy_image_1d_to_1d.cl

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 8651af6..82b6df0 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -41,8 +41,8 @@ set (KERNEL_STR_FILES)
 set (KERNEL_NAMES cl_internal_copy_buf_align4
 cl_internal_copy_buf_align16 cl_internal_copy_buf_unalign_same_offset
 cl_internal_copy_buf_unalign_dst_offset cl_internal_copy_buf_unalign_src_offset
-cl_internal_copy_buf_rect cl_internal_copy_image_2d_to_2d 
cl_internal_copy_image_3d_to_2d
-cl_internal_copy_image_2d_to_3d cl_internal_copy_image_3d_to_3d
+cl_internal_copy_buf_rect cl_internal_copy_image_1d_to_1d 
cl_internal_copy_image_2d_to_2d
+cl_internal_copy_image_3d_to_2d cl_internal_copy_image_2d_to_3d 
cl_internal_copy_image_3d_to_3d
 cl_internal_copy_image_2d_to_buffer cl_internal_copy_image_3d_to_buffer
 cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_3d
 cl_internal_fill_buf_align8 cl_internal_fill_buf_align4
diff --git a/src/cl_context.h b/src/cl_context.h
index cba0a0a..74e31c7 100644
--- a/src/cl_context.h
+++ b/src/cl_context.h
@@ -47,6 +47,7 @@ enum _cl_internal_ker_type {
   CL_ENQUEUE_COPY_BUFFER_UNALIGN_DST_OFFSET,
   CL_ENQUEUE_COPY_BUFFER_UNALIGN_SRC_OFFSET,
   CL_ENQUEUE_COPY_BUFFER_RECT,
+  CL_ENQUEUE_COPY_IMAGE_1D_TO_1D, //copy image 1d to image 1d
   CL_ENQUEUE_COPY_IMAGE_2D_TO_2D, //copy image 2d to image 2d
   CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, //copy image 3d to image 2d
   CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, //copy image 2d to image 3d
diff --git a/src/cl_mem.c b/src/cl_mem.c
index e0c4ec9..8bb7215 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -542,6 +542,38 @@ cl_mem_copy_image_region(const size_t *origin, const 
size_t *region,
   }
 }
 
+void
+cl_mem_copy_image_to_image(const size_t *dst_origin,const size_t *src_origin, 
const size_t *region,
+   const struct _cl_mem_image *dst_image, const struct 
_cl_mem_image *src_image)
+{
+  //printf("origin:%u,%u,%u to %u,%u,%u\n", src_origin[0],src_origin[1], 
src_origin[2], dst_origin[0],dst_origin[1], dst_origin[2]);
+  //printf("region:%u,%u,%u \n", region[0],region[1], region[2]);
+  //printf("pitch:%u,%u to %u,%u\n", src_image->row_pitch, 
src_image->slice_pitch,dst_image->row_pitch, dst_image->slice_pitch);
+
+  char* dst= cl_mem_map_auto((cl_mem)dst_image);
+  char* src= cl_mem_map_auto((cl_mem)src_image);
+  size_t dst_offset = dst_image->bpp * dst_origin[0] + dst_image->row_pitch * 
dst_origin[1] + dst_image->slice_pitch * dst_origin[2];
+  size_t src_offset = src_image->bpp * src_origin[0] + src_image->row_pitch * 
src_origin[1] + src_image->slice_pitch * src_origin[2];
+  dst= (char*)dst+ dst_offset;
+  src= (char*)src+ src_offset;
+  cl_uint y, z;
+  for (z = 0; z < region[2]; z++) {
+const char* src_ptr = src;
+char* dst_ptr = dst;
+for (y = 0; y < region[1]; y++) {
+  memcpy(dst_ptr, src_ptr, src_image->bpp*region[0]);
+  src_ptr += src_image->row_pitch;
+  dst_ptr += dst_image->row_pitch;
+}
+src = (char*)src + src_image->slice_pitch;
+dst = (char*)dst + dst_image->slice_pitch;
+  }
+
+  cl_mem_unmap_auto((cl_mem)src_image);
+  cl_mem_unmap_auto((cl_mem)dst_image);
+
+}
+
 static void
 cl_mem_copy_image(struct _cl_mem_image *image,
  size_t row_pitch,
@@ -1377,7 +1409,16 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct 
_cl_mem_image* src_image
   assert(src_image->base.ctx == dst_image->base.ctx);
 
   /* setup the kernel and run. */
-  if(src_image->image_type == CL_MEM_OBJECT_IMAGE2D) {
+  if(src_image->image_type == CL_MEM_OBJECT_IMAGE1D) {
+if(dst_image->image_type == CL_MEM_OBJECT_IMAGE1D) {
+  extern char cl_internal_copy_image_1d_to_1d_str[];
+  extern int cl_internal_copy_image_1d_to_1d_str_size;
+
+  ker = cl_context_get_static_kernel_form_bin(queue->ctx, 
CL_ENQUEUE_COPY_IMAGE_1D_TO_1D,
+  cl_internal_copy_image_1d_to_1d_str, 
(size_t)cl_internal_copy_image_1d_to_1d_str_size, NULL);
+}
+
+  }else if(src_image->image_type == CL_MEM_OBJECT_IMAGE2D) {
 if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D) {
   extern char cl_internal_copy_image_2d_to_2d_str[];
   extern int cl_internal_copy_image_2d_to_2d_str_size;
@@ -1390,8 +1431,33 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct 
_cl_mem_image* src_image
 
   ker = cl_context_get_static_kernel_form_bin(queue->ctx, 
CL_ENQUEUE_COPY_IMAG

[Beignet] [PATCH] fix enqueue_built_in_kernels bug.

2014-06-24 Thread xionghu . luo
From: Luo 

need asign the length to a local variable.

Signed-off-by: Luo 
---
 src/cl_gt_device.h | 1 +
 src/cl_program.c   | 8 +---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index d7855cd..63c9047 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -87,6 +87,7 @@ DECL_INFO_STRING(built_in_kernels, "__cl_copy_region_align4;"
"__cl_copy_region_unalign_dst_offset;"
"__cl_copy_region_unalign_src_offset;"
"__cl_copy_buffer_rect;"
+   "__cl_copy_image_1d_to_1d;"
"__cl_copy_image_2d_to_2d;"
"__cl_copy_image_3d_to_2d;"
"__cl_copy_image_2d_to_3d;"
diff --git a/src/cl_program.c b/src/cl_program.c
index 7888a8f..13867e0 100644
--- a/src/cl_program.c
+++ b/src/cl_program.c
@@ -213,12 +213,14 @@ cl_program_create_from_binary(cl_context ctx,
 goto error;
   }
 
+  int length = (int)lengths[0];
+
   program = cl_program_new(ctx);
 
   // TODO:  Need to check the binary format here to return CL_INVALID_BINARY.
-  TRY_ALLOC(program->binary, cl_calloc(lengths[0], sizeof(char)));
-  memcpy(program->binary, binaries[0], lengths[0]);
-  program->binary_sz = lengths[0];
+  TRY_ALLOC(program->binary, cl_calloc(length, sizeof(char)));
+  memcpy(program->binary, binaries[0], length);
+  program->binary_sz = length;
   program->source_type = FROM_BINARY;
 
   if(isBitcode((unsigned char*)program->binary+1, (unsigned 
char*)program->binary+program->binary_sz)) {
-- 
1.8.1.2

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH V2] fix enqueue_built_in_kernels bug. add image_1d_to_1d builtin kernel name.

2014-06-24 Thread xionghu . luo
From: Luo 

need asign the length to a local variable.

v2: bug was a false alarm. the case passed after regenerate cmake folder.

Signed-off-by Luo 
---
 src/cl_gt_device.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index d7855cd..63c9047 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -87,6 +87,7 @@ DECL_INFO_STRING(built_in_kernels, "__cl_copy_region_align4;"
"__cl_copy_region_unalign_dst_offset;"
"__cl_copy_region_unalign_src_offset;"
"__cl_copy_buffer_rect;"
+   "__cl_copy_image_1d_to_1d;"
"__cl_copy_image_2d_to_2d;"
"__cl_copy_image_3d_to_2d;"
"__cl_copy_image_2d_to_3d;"
-- 
1.8.1.2

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] add the usage of link program from llvm binary.

2014-06-26 Thread xionghu . luo
From: Luo 

user A could compile and link kernel source to llvm binary first, then
query the binary to save to file; With the binary, user B can call
clCreateProgramWithBinary without compile the source again.
this usage could protect those who need to protect the kernel source.

Signed-off-by: Luo 
---
 utests/runtime_compile_link.cpp | 41 ++---
 1 file changed, 38 insertions(+), 3 deletions(-)

diff --git a/utests/runtime_compile_link.cpp b/utests/runtime_compile_link.cpp
index 8aeea31..f27b1dd 100644
--- a/utests/runtime_compile_link.cpp
+++ b/utests/runtime_compile_link.cpp
@@ -67,12 +67,47 @@ void runtime_compile_link(void)
 
   OCL_ASSERT(err==CL_SUCCESS);
   cl_program input_programs[2] = { program_A, program_B};
-  cl_program linked_program = clLinkProgram(ctx, 0, NULL, NULL, 2, 
input_programs, NULL, NULL, &err);
-
+  cl_program linked_program = clLinkProgram(ctx, 0, NULL, "-create-library", 
2, input_programs, NULL, NULL, &err);
 
   OCL_ASSERT(linked_program != NULL);
   OCL_ASSERT(err == CL_SUCCESS);
+  size_t  binarySize;
+  unsigned char *binary;
+
+  // Get the size of the resulting binary (only one device)
+  err= clGetProgramInfo( linked_program, CL_PROGRAM_BINARY_SIZES, sizeof( 
binarySize ), &binarySize, NULL );
+  OCL_ASSERT(err==CL_SUCCESS);
+
+  // Create a buffer and get the actual binary
+  binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
+  if (binary == NULL) {
+OCL_ASSERT(0);
+return ;
+  }
+
+  unsigned char *buffers[ 1 ] = { binary };
+  // Do another sanity check here first
+  size_t size;
+  cl_int loadErrors[ 1 ];
+  err = clGetProgramInfo( linked_program, CL_PROGRAM_BINARIES, 0, NULL, &size 
);
+  OCL_ASSERT(err==CL_SUCCESS);
+  if( size != sizeof( buffers ) ){
+free(binary);
+return ;
+  }
+
+  err = clGetProgramInfo( linked_program, CL_PROGRAM_BINARIES, sizeof( buffers 
), &buffers, NULL );
+  OCL_ASSERT(err==CL_SUCCESS);
+
+  cl_device_id deviceID;
+  err = clGetProgramInfo( linked_program, CL_PROGRAM_DEVICES, sizeof( 
deviceID), &deviceID, NULL );
+  OCL_ASSERT(err==CL_SUCCESS);
+
+  cl_program program_with_binary = clCreateProgramWithBinary(ctx, 1, 
&deviceID, &binarySize, (const unsigned char**)buffers, loadErrors, &err);
+  OCL_ASSERT(err==CL_SUCCESS);
 
+  cl_program my_newly_linked_program = clLinkProgram(ctx, 1, &deviceID, NULL, 
1, &program_with_binary, NULL, NULL, &err);
+  OCL_ASSERT(err==CL_SUCCESS);
   // link success, run this kernel.
 
   const size_t n = 16;
@@ -104,7 +139,7 @@ void runtime_compile_link(void)
   OCL_UNMAP_BUFFER(0);
   OCL_UNMAP_BUFFER(1);
 
-  kernel = clCreateKernel(linked_program, "runtime_compile_link_a", &err);
+  kernel = clCreateKernel(my_newly_linked_program, "runtime_compile_link_a", 
&err);
 
   OCL_ASSERT(err == CL_SUCCESS);
 
-- 
1.8.1.2

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] remove lspci, gbe_bin_genenrater would generator llvm binary by default.

2014-07-07 Thread xionghu . luo
From: Luo 

driver can get chipset id by ioctl instead of calling lspci in cmake;
user could generator gen binary by configuring cmake option
-DGEN_PCI_ID= or calling the gbe_bin_generater with option -t
GEN_PCI_ID.

Signed-off-by: Luo 
---
 backend/src/gbe_bin_generater.cpp   | 70 -
 src/CMakeLists.txt  | 32 +++--
 src/GetGenID.sh | 26 --
 src/cl_program.c| 22 +---
 utests/CMakeLists.txt   | 17 ++---
 utests/enqueue_built_in_kernels.cpp |  1 -
 6 files changed, 89 insertions(+), 79 deletions(-)
 delete mode 100755 src/GetGenID.sh

diff --git a/backend/src/gbe_bin_generater.cpp 
b/backend/src/gbe_bin_generater.cpp
index 925ba93..17e8a7e 100644
--- a/backend/src/gbe_bin_generater.cpp
+++ b/backend/src/gbe_bin_generater.cpp
@@ -156,19 +156,21 @@ void program_build_instance::serialize_program(void) 
throw(int)
 {
 ofstream ofs;
 ostringstream oss;
-size_t sz, header_sz = 0;
+size_t sz = 0, header_sz = 0;
 ofs.open(bin_path, ofstream::out | ofstream::trunc | ofstream::binary);
 
-//add header to differeciate from llvm bitcode binary.
-// (5 bytes: 1 byte for binary type, 4 byte for bc code.)
-char header = '\0';
-
 if (str_fmt_out) {
-  OUTS_UPDATE_SZ(header);
-  OUTS_UPDATE_SZ(header);
-  OUTS_UPDATE_SZ(header);
-  OUTS_UPDATE_SZ(header);
-  OUTS_UPDATE_SZ(header);
+
+  if(gen_pci_id){
+//add header to differeciate from llvm bitcode binary.
+// (5 bytes: 1 byte for binary type, 4 byte for bc code.)
+char header = '\0';
+OUTS_UPDATE_SZ(header);
+OUTS_UPDATE_SZ(header);
+OUTS_UPDATE_SZ(header);
+OUTS_UPDATE_SZ(header);
+OUTS_UPDATE_SZ(header);
+  }
 
   string array_name = "Unkown_name_array";
   unsigned long last_slash = bin_path.rfind("/");
@@ -180,9 +182,15 @@ void program_build_instance::serialize_program(void) 
throw(int)
   ofs << "#include " << "\n";
   ofs << "char " << array_name << "[] = {" << "\n";
 
-  sz = gbe_prog->serializeToBin(oss);
-
-  sz+=5;
+  if(gen_pci_id){
+sz = gbe_prog->serializeToBin(oss);
+sz+= header_sz;
+  }else{
+char *llvm_binary;
+size_t bin_length = 
gbe_program_serialize_to_binary((gbe_program)gbe_prog, &llvm_binary, 1);
+oss.write(llvm_binary, bin_length);
+sz += bin_length;
+  }
 
   for (size_t i = 0; i < sz; i++) {
 unsigned char c = oss.str().c_str()[i];
@@ -191,18 +199,27 @@ void program_build_instance::serialize_program(void) 
throw(int)
 ofs << "0x";
 ofs << asic_str << ((i == sz - 1) ? "" : ", ");
   }
-
   ofs << "};\n";
 
   string array_size = array_name + "_size";
   ofs << "size_t " << array_size << " = " << sz << ";" << "\n";
 } else {
-  OUTF_UPDATE_SZ(header);
-  OUTF_UPDATE_SZ(header);
-  OUTF_UPDATE_SZ(header);
-  OUTF_UPDATE_SZ(header);
-  OUTF_UPDATE_SZ(header);
-  sz = gbe_prog->serializeToBin(ofs);
+  if(gen_pci_id){
+//add header to differeciate from llvm bitcode binary.
+// (5 bytes: 1 byte for binary type, 4 byte for bc code.)
+char header = '\0';
+OUTF_UPDATE_SZ(header);
+OUTF_UPDATE_SZ(header);
+OUTF_UPDATE_SZ(header);
+OUTF_UPDATE_SZ(header);
+OUTF_UPDATE_SZ(header);
+sz = gbe_prog->serializeToBin(ofs);
+  }else{
+char *llvm_binary;
+size_t bin_length = 
gbe_program_serialize_to_binary((gbe_program)gbe_prog, &llvm_binary, 1);
+ofs.write(llvm_binary, bin_length);
+sz+=bin_length;
+  }
 }
 
 ofs.close();
@@ -215,15 +232,20 @@ void program_build_instance::serialize_program(void) 
throw(int)
 
 void program_build_instance::build_program(void) throw(int)
 {
-// FIXME, we need to find a graceful way to generate internal binaries for 
difference
-// devices.
-gbe_program opaque = gbe_program_new_from_source(gen_pci_id, code, 0, 
build_opt.c_str(), NULL, NULL);
+gbe_program  opaque = NULL;
+if(gen_pci_id){
+  opaque = gbe_program_new_from_source(gen_pci_id, code, 0, 
build_opt.c_str(), NULL, NULL);
+}else{
+  opaque = gbe_program_compile_from_source(0, code, NULL, 0, 
build_opt.c_str(), NULL, NULL);
+}
 if (!opaque)
 throw FILE_BUILD_FAILED;
 
 gbe_prog = reinterpret_cast(opaque);
 
-assert(gbe_program_get_kernel_num(opaque));
+if(gen_pci_id){
+  assert(gbe_program_get_kernel_num(opaque));
+}
 }
 
 const char* program_build_instance::file_map_open(void) throw(int)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 9b41932..45c83d4 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -5,30 +5,24 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}
 ${CMAKE_CURRENT_SOURCE_DIR}/../include
 ${MESA_S

[Beignet] [PATCH v2] remove lspci, gbe_bin_genenrater would generator llvm binary by default.

2014-07-07 Thread xionghu . luo
From: Luo 

driver can get chipset id by ioctl instead of calling lspci in cmake;
user could generator gen binary by configuring cmake option
-DGEN_PCI_ID= or calling the gbe_bin_generater with option -t
GEN_PCI_ID.

v2: add "\0GENC" magic code for gen binary, fix typo.

Signed-off-by: Luo 
---
 backend/src/gbe_bin_generater.cpp   | 72 -
 src/CMakeLists.txt  | 32 +++--
 src/GetGenID.sh | 26 --
 src/cl_program.c| 22 +---
 utests/CMakeLists.txt   | 17 ++---
 utests/enqueue_built_in_kernels.cpp |  1 -
 6 files changed, 90 insertions(+), 80 deletions(-)
 delete mode 100755 src/GetGenID.sh

diff --git a/backend/src/gbe_bin_generater.cpp 
b/backend/src/gbe_bin_generater.cpp
index 925ba93..d9ae946 100644
--- a/backend/src/gbe_bin_generater.cpp
+++ b/backend/src/gbe_bin_generater.cpp
@@ -156,21 +156,23 @@ void program_build_instance::serialize_program(void) 
throw(int)
 {
 ofstream ofs;
 ostringstream oss;
-size_t sz, header_sz = 0;
+size_t sz = 0, header_sz = 0;
 ofs.open(bin_path, ofstream::out | ofstream::trunc | ofstream::binary);
 
-//add header to differeciate from llvm bitcode binary.
-// (5 bytes: 1 byte for binary type, 4 byte for bc code.)
-char header = '\0';
-
 if (str_fmt_out) {
-  OUTS_UPDATE_SZ(header);
-  OUTS_UPDATE_SZ(header);
-  OUTS_UPDATE_SZ(header);
-  OUTS_UPDATE_SZ(header);
-  OUTS_UPDATE_SZ(header);
 
-  string array_name = "Unkown_name_array";
+  if(gen_pci_id){
+//add header to differeciate from llvm bitcode binary.
+// (5 bytes: 1 byte for binary type, 4 byte for bc code, 'GENC' is for 
gen binary.)
+char gen_header[6] = "\0GENC";
+OUTS_UPDATE_SZ(gen_header[0]);
+OUTS_UPDATE_SZ(gen_header[1]);
+OUTS_UPDATE_SZ(gen_header[2]);
+OUTS_UPDATE_SZ(gen_header[3]);
+OUTS_UPDATE_SZ(gen_header[4]);
+  }
+
+  string array_name = "Unknown_name_array";
   unsigned long last_slash = bin_path.rfind("/");
   unsigned long last_dot = bin_path.rfind(".");
 
@@ -180,9 +182,15 @@ void program_build_instance::serialize_program(void) 
throw(int)
   ofs << "#include " << "\n";
   ofs << "char " << array_name << "[] = {" << "\n";
 
-  sz = gbe_prog->serializeToBin(oss);
-
-  sz+=5;
+  if(gen_pci_id){
+sz = gbe_prog->serializeToBin(oss);
+sz += header_sz;
+  }else{
+char *llvm_binary;
+size_t bin_length = 
gbe_program_serialize_to_binary((gbe_program)gbe_prog, &llvm_binary, 1);
+oss.write(llvm_binary, bin_length);
+sz += bin_length;
+  }
 
   for (size_t i = 0; i < sz; i++) {
 unsigned char c = oss.str().c_str()[i];
@@ -191,18 +199,27 @@ void program_build_instance::serialize_program(void) 
throw(int)
 ofs << "0x";
 ofs << asic_str << ((i == sz - 1) ? "" : ", ");
   }
-
   ofs << "};\n";
 
   string array_size = array_name + "_size";
   ofs << "size_t " << array_size << " = " << sz << ";" << "\n";
 } else {
-  OUTF_UPDATE_SZ(header);
-  OUTF_UPDATE_SZ(header);
-  OUTF_UPDATE_SZ(header);
-  OUTF_UPDATE_SZ(header);
-  OUTF_UPDATE_SZ(header);
-  sz = gbe_prog->serializeToBin(ofs);
+  if(gen_pci_id){
+//add header to differeciate from llvm bitcode binary.
+// (5 bytes: 1 byte for binary type, 4 byte for bc code, 'GENC' is for 
gen binary.)
+char gen_header[6] = "\0GENC";
+OUTF_UPDATE_SZ(gen_header[0]);
+OUTF_UPDATE_SZ(gen_header[1]);
+OUTF_UPDATE_SZ(gen_header[2]);
+OUTF_UPDATE_SZ(gen_header[3]);
+OUTF_UPDATE_SZ(gen_header[4]);
+sz = gbe_prog->serializeToBin(ofs);
+  }else{
+char *llvm_binary;
+size_t bin_length = 
gbe_program_serialize_to_binary((gbe_program)gbe_prog, &llvm_binary, 1);
+ofs.write(llvm_binary, bin_length);
+sz+=bin_length;
+  }
 }
 
 ofs.close();
@@ -215,15 +232,20 @@ void program_build_instance::serialize_program(void) 
throw(int)
 
 void program_build_instance::build_program(void) throw(int)
 {
-// FIXME, we need to find a graceful way to generate internal binaries for 
difference
-// devices.
-gbe_program opaque = gbe_program_new_from_source(gen_pci_id, code, 0, 
build_opt.c_str(), NULL, NULL);
+gbe_program  opaque = NULL;
+if(gen_pci_id){
+  opaque = gbe_program_new_from_source(gen_pci_id, code, 0, 
build_opt.c_str(), NULL, NULL);
+}else{
+  opaque = gbe_program_compile_from_source(0, code, NULL, 0, 
build_opt.c_str(), NULL, NULL);
+}
 if (!opaque)
 throw FILE_BUILD_FAILED;
 
 gbe_prog = reinterpret_cast(opaque);
 
-assert(gbe_program_get_kernel_num(opaque));
+if(gen_pci_id){
+  assert(gbe_program_get_kernel_num(opaque));
+}
 }
 
 const char* program_build_instance::file_map_open

[Beignet] [PATCH 2/2] add utest load_program_from_gen_bin.

2014-07-08 Thread xionghu . luo
From: LuoXionghu 

this test case would check whether genProgramSerializeToBinary in
backend can generator gen binary correctly.

rename load_program_from_bin to load_program_from_bin_file.
the difference is load_program_from_bin_file could either load program
from llvm binary or gen binary file generated by gbe_bin_generator.

Signed-off-by: LuoXionghu 
---
 utests/CMakeLists.txt |  3 +-
 utests/load_program_from_bin.cpp  | 77 -
 utests/load_program_from_bin_file.cpp | 77 +
 utests/load_program_from_gen_bin.cpp  | 93 +++
 4 files changed, 172 insertions(+), 78 deletions(-)
 delete mode 100644 utests/load_program_from_bin.cpp
 create mode 100644 utests/load_program_from_bin_file.cpp
 create mode 100644 utests/load_program_from_gen_bin.cpp

diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 3614c57..561744d 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -171,7 +171,8 @@ set (utests_sources
   compiler_simd_any.cpp
   compiler_simd_all.cpp
   compiler_double_precision.cpp
-  load_program_from_bin.cpp
+  load_program_from_bin_file.cpp
+  load_program_from_gen_bin.cpp
   get_arg_info.cpp
   profiling_exec.cpp
   enqueue_copy_buf.cpp
diff --git a/utests/load_program_from_bin.cpp b/utests/load_program_from_bin.cpp
deleted file mode 100644
index d45c2bd..000
--- a/utests/load_program_from_bin.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-#include "utest_helper.hpp"
-#include "utest_file_map.hpp"
-#include 
-#include 
-
-using namespace std;
-
-static void cpu(int global_id, float *src, float *dst) {
-dst[global_id] = ceilf(src[global_id]);
-}
-
-static void test_load_program_from_bin(void)
-{
-const size_t n = 16;
-float cpu_dst[16], cpu_src[16];
-cl_int status;
-cl_int binary_status;
-char *ker_path = NULL;
-
-cl_file_map_t *fm = cl_file_map_new();
-ker_path = cl_do_kiss_path("compiler_ceil.bin", device);
-OCL_ASSERT (cl_file_map_open(fm, ker_path) == CL_FILE_MAP_SUCCESS);
-
-const unsigned char *src = (const unsigned char *)cl_file_map_begin(fm);
-const size_t sz = cl_file_map_size(fm);
-
-program = clCreateProgramWithBinary(ctx, 1,
-  &device, &sz, &src, &binary_status, &status);
-
-OCL_ASSERT(program && status == CL_SUCCESS);
-
-/* OCL requires to build the program even if it is created from a binary */
-OCL_ASSERT(clBuildProgram(program, 1, &device, NULL, NULL, NULL) == 
CL_SUCCESS);
-
-kernel = clCreateKernel(program, "compiler_ceil", &status);
-OCL_ASSERT(status == CL_SUCCESS);
-
-OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL);
-OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL);
-OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
-OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
-globals[0] = 16;
-locals[0] = 16;
-
-// Run random tests
-for (uint32_t pass = 0; pass < 8; ++pass) {
-OCL_MAP_BUFFER(0);
-for (int32_t i = 0; i < (int32_t) n; ++i)
-cpu_src[i] = ((float*)buf_data[0])[i] = .1f * (rand() & 15) - .75f;
-OCL_UNMAP_BUFFER(0);
-
-// Run the kernel on GPU
-OCL_NDRANGE(1);
-
-// Run on CPU
-for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, cpu_src, cpu_dst);
-
-// Compare
-OCL_MAP_BUFFER(1);
-
-#if 0
-printf(" GPU:\n");
-for (int32_t i = 0; i < (int32_t) n; ++i)
-printf(" %f", ((float *)buf_data[1])[i]);
-printf("\n CPU:\n");
-for (int32_t i = 0; i < (int32_t) n; ++i)
-printf(" %f", cpu_dst[i]);
-printf("\n");
-#endif
-
-for (int32_t i = 0; i < (int32_t) n; ++i)
-OCL_ASSERT(((float *)buf_data[1])[i] == cpu_dst[i]);
-OCL_UNMAP_BUFFER(1);
-}
-}
-
-MAKE_UTEST_FROM_FUNCTION(test_load_program_from_bin);
diff --git a/utests/load_program_from_bin_file.cpp 
b/utests/load_program_from_bin_file.cpp
new file mode 100644
index 000..feefacc
--- /dev/null
+++ b/utests/load_program_from_bin_file.cpp
@@ -0,0 +1,77 @@
+#include "utest_helper.hpp"
+#include "utest_file_map.hpp"
+#include 
+#include 
+
+using namespace std;
+
+static void cpu(int global_id, float *src, float *dst) {
+dst[global_id] = ceilf(src[global_id]);
+}
+
+static void test_load_program_from_bin_file(void)
+{
+const size_t n = 16;
+float cpu_dst[16], cpu_src[16];
+cl_int status;
+cl_int binary_status;
+char *ker_path = NULL;
+
+cl_file_map_t *fm = cl_file_map_new();
+ker_path = cl_do_kiss_path("compiler_ceil.bin", device);
+OCL_ASSERT (cl_file_map_open(fm, ker_path) == CL_FILE_MAP_SUCCESS);
+
+const unsigned char *src = (const unsigned char *)cl_file_map_begin(fm);
+const size_t sz = cl_file_map_size(fm);
+
+program = clCreateProgramWithBinary(ctx, 1,
+  &device, &sz, &src, &binary_status, &status);
+
+OCL_ASSERT(program && status == CL_SUCCESS);
+
+/* OCL requi

[Beignet] [PATCH 0/2] gen binary with pci info.

2014-07-08 Thread xionghu . luo
From: LuoXionghu 

this patchset depends on the patch 
"remove lspci, gbe_bin_genenrater would generator llvm binary by default."

LuoXionghu (2):
  add pci info in the gen binary code.
  add utest load_program_from_gen_bin.

 backend/src/backend/gen_program.cpp   | 34 +
 backend/src/gbe_bin_generater.cpp |  2 +
 utests/CMakeLists.txt |  3 +-
 utests/load_program_from_bin.cpp  | 77 -
 utests/load_program_from_bin_file.cpp | 77 +
 utests/load_program_from_gen_bin.cpp  | 93 +++
 6 files changed, 199 insertions(+), 87 deletions(-)
 delete mode 100644 utests/load_program_from_bin.cpp
 create mode 100644 utests/load_program_from_bin_file.cpp
 create mode 100644 utests/load_program_from_gen_bin.cpp

-- 
1.8.1.2

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 1/2] add pci info in the gen binary code.

2014-07-08 Thread xionghu . luo
From: LuoXionghu 

the size of the pci info is 4 bytes, right after the '/0GENC'.
check the header magic number and pci info before deserializeFromBin.

Signed-off-by: LuoXionghu 
---
 backend/src/backend/gen_program.cpp | 34 +-
 backend/src/gbe_bin_generater.cpp   |  2 ++
 2 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/backend/src/backend/gen_program.cpp 
b/backend/src/backend/gen_program.cpp
index 84e8c2a..fc50344 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -196,15 +196,29 @@ namespace gbe {
 #endif
   }
 
+#define IS_GEN_BINARY(binary) (*binary == '\0' && *(binary+1) == 'G'&& 
*(binary+2) == 'E' &&*(binary+3) == 'N' &&*(binary+4) == 'C')
+#define BINARY_MATCH(typeA, typeB) ((IS_IVYBRIDGE(typeA) && 
IS_IVYBRIDGE(typeB)) || (IS_HASWELL(typeA) && IS_HASWELL(typeB)) )
+#define FILL_GEN_BINARY(binary) do{*binary = '\0'; *(binary+1) = 'G'; 
*(binary+2) = 'E'; *(binary+3) = 'N'; *(binary+4) = 'C';}while(0)
+#define FILL_DEVICE_ID(binary, devID) do {*(binary+5) = devID; *(binary+6) = 
devID >>8; *(binary+7) = devID >>16; *(binary+8) = devID >>24;}while(0)
+
   static gbe_program genProgramNewFromBinary(uint32_t deviceID, const char 
*binary, size_t size) {
 using namespace gbe;
 std::string binary_content;
-//the first 5 bytes are header to differentiate from llvm bitcode binary.
-binary_content.assign(binary+5, size-5);
+//the header length is 9 bytes: 1 byte is binary type, 4 bytes are bitcode 
header, 4  bytes are device id info.
+uint32_t bin_deviceID = 
*(binary+5)|*(binary+6)<<8|*(binary+7)<<16|*(binary+8)<<24;
+
+// check whether is gen binary ('/0GENC')
+if(!IS_GEN_BINARY(binary)){
+return NULL;
+}
+// check the whether the current device ID match the binary file's.
+if(!BINARY_MATCH(bin_deviceID, deviceID)){
+  return NULL;
+}
+
+binary_content.assign(binary+9, size-9);
 GenProgram *program = GBE_NEW(GenProgram, deviceID);
 std::istringstream ifs(binary_content, std::ostringstream::binary);
-// FIXME we need to check the whether the current device ID match the 
binary file's.
-deviceID = deviceID;
 
 if (!program->deserializeFromBin(ifs)) {
   delete program;
@@ -255,11 +269,13 @@ namespace gbe {
   }
 
   //add header to differetiate from llvm bitcode binary.
-  //the header length is 5 bytes: 1 binary type, 4 bitcode header.
-  *binary = (char *)malloc(sizeof(char) * (sz+5) );
-  memset(*binary, 0, sizeof(char) * (sz+5) );
-  memcpy(*binary+5, oss.str().c_str(), sz*sizeof(char));
-  return sz+5;
+  //the header length is 9 bytes: 1 byte is binary type, 4 bytes are 
bitcode header, 4  bytes are device id info.
+  *binary = (char *)malloc(sizeof(char) * (sz+9) );
+  memset(*binary, 0, sizeof(char) * (sz+9) );
+  FILL_GEN_BINARY(*binary);
+  FILL_DEVICE_ID(*binary, prog->deviceID);
+  memcpy(*binary+9, oss.str().c_str(), sz*sizeof(char));
+  return sz+9;
 }else{
 #ifdef GBE_COMPILER_AVAILABLE
   std::string str;
diff --git a/backend/src/gbe_bin_generater.cpp 
b/backend/src/gbe_bin_generater.cpp
index d9ae946..e7a5b97 100644
--- a/backend/src/gbe_bin_generater.cpp
+++ b/backend/src/gbe_bin_generater.cpp
@@ -170,6 +170,7 @@ void program_build_instance::serialize_program(void) 
throw(int)
 OUTS_UPDATE_SZ(gen_header[2]);
 OUTS_UPDATE_SZ(gen_header[3]);
 OUTS_UPDATE_SZ(gen_header[4]);
+OUTS_UPDATE_SZ(gen_pci_id);
   }
 
   string array_name = "Unknown_name_array";
@@ -213,6 +214,7 @@ void program_build_instance::serialize_program(void) 
throw(int)
 OUTF_UPDATE_SZ(gen_header[2]);
 OUTF_UPDATE_SZ(gen_header[3]);
 OUTF_UPDATE_SZ(gen_header[4]);
+OUTF_UPDATE_SZ(gen_pci_id);
 sz = gbe_prog->serializeToBin(ofs);
   }else{
 char *llvm_binary;
-- 
1.8.1.2

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [Patch V3 1/2] add platform info in the gen binary code.

2014-07-15 Thread xionghu . luo
From: LuoXionghu 

the size of the platform info is 3 bytes, right after the '/0GENC'.
check the header magic number and platform info before deserializeFromBin.

v2: supports IVB/BYT/HSW binary on its' platform, and BYT binary runs on IVB.
v3: fix 'BYT' overwritten by 'IVB';

Signed-off-by: LuoXionghu 
---
 backend/src/backend/gen_program.cpp | 56 +++--
 backend/src/gbe_bin_generater.cpp   | 23 +++
 2 files changed, 70 insertions(+), 9 deletions(-)

diff --git a/backend/src/backend/gen_program.cpp 
b/backend/src/backend/gen_program.cpp
index 84e8c2a..c846786 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -196,15 +196,36 @@ namespace gbe {
 #endif
   }
 
+#define BINARY_HEADER_LENGTH 8
+#define IS_GEN_BINARY(binary) (*binary == '\0' && *(binary+1) == 'G'&& 
*(binary+2) == 'E' &&*(binary+3) == 'N' &&*(binary+4) == 'C')
+#define FILL_GEN_BINARY(binary) do{*binary = '\0'; *(binary+1) = 'G'; 
*(binary+2) = 'E'; *(binary+3) = 'N'; *(binary+4) = 'C';}while(0)
+#define FILL_DEVICE_ID(binary, src_hw_info) do {*(binary+5) = src_hw_info[0]; 
*(binary+6) = src_hw_info[1]; *(binary+7) = src_hw_info[2];}while(0)
+#define DEVICE_MATCH(typeA, src_hw_info) ((IS_IVYBRIDGE(typeA) && 
!strcmp(src_hw_info, "IVB")) ||  \
+  (IS_IVYBRIDGE(typeA) && 
!strcmp(src_hw_info, "BYT")) ||  \
+  (IS_BAYTRAIL_T(typeA) && 
!strcmp(src_hw_info, "BYT")) ||  \
+  (IS_HASWELL(typeA) && 
!strcmp(src_hw_info, "HSW")) )
+
   static gbe_program genProgramNewFromBinary(uint32_t deviceID, const char 
*binary, size_t size) {
 using namespace gbe;
 std::string binary_content;
-//the first 5 bytes are header to differentiate from llvm bitcode binary.
-binary_content.assign(binary+5, size-5);
+//the header length is 8 bytes: 1 byte is binary type, 4 bytes are bitcode 
header, 3  bytes are hw info.
+char src_hw_info[4]="";
+src_hw_info[0] = *(binary+5);
+src_hw_info[1] = *(binary+6);
+src_hw_info[2] = *(binary+7);
+
+// check whether is gen binary ('/0GENC')
+if(!IS_GEN_BINARY(binary)){
+return NULL;
+}
+// check the whether the current device ID match the binary file's.
+if(!DEVICE_MATCH(deviceID, src_hw_info)){
+  return NULL;
+}
+
+binary_content.assign(binary+BINARY_HEADER_LENGTH, 
size-BINARY_HEADER_LENGTH);
 GenProgram *program = GBE_NEW(GenProgram, deviceID);
 std::istringstream ifs(binary_content, std::ostringstream::binary);
-// FIXME we need to check the whether the current device ID match the 
binary file's.
-deviceID = deviceID;
 
 if (!program->deserializeFromBin(ifs)) {
   delete program;
@@ -255,11 +276,28 @@ namespace gbe {
   }
 
   //add header to differetiate from llvm bitcode binary.
-  //the header length is 5 bytes: 1 binary type, 4 bitcode header.
-  *binary = (char *)malloc(sizeof(char) * (sz+5) );
-  memset(*binary, 0, sizeof(char) * (sz+5) );
-  memcpy(*binary+5, oss.str().c_str(), sz*sizeof(char));
-  return sz+5;
+  //the header length is 8 bytes: 1 byte is binary type, 4 bytes are 
bitcode header, 3  bytes are hw info.
+  *binary = (char *)malloc(sizeof(char) * (sz+BINARY_HEADER_LENGTH) );
+  memset(*binary, 0, sizeof(char) * (sz+BINARY_HEADER_LENGTH) );
+  FILL_GEN_BINARY(*binary);
+  char src_hw_info[4]="";
+  if(IS_IVYBRIDGE(prog->deviceID)){
+src_hw_info[0]='I';
+src_hw_info[1]='V';
+src_hw_info[2]='B';
+if(IS_BAYTRAIL_T(prog->deviceID)){
+  src_hw_info[0]='B';
+  src_hw_info[1]='Y';
+  src_hw_info[2]='T';
+}
+  }else if(IS_HASWELL(prog->deviceID)){
+src_hw_info[0]='H';
+src_hw_info[1]='S';
+src_hw_info[2]='W';
+  }
+  FILL_DEVICE_ID(*binary, src_hw_info);
+  memcpy(*binary+BINARY_HEADER_LENGTH, oss.str().c_str(), sz*sizeof(char));
+  return sz+BINARY_HEADER_LENGTH;
 }else{
 #ifdef GBE_COMPILER_AVAILABLE
   std::string str;
diff --git a/backend/src/gbe_bin_generater.cpp 
b/backend/src/gbe_bin_generater.cpp
index d9ae946..86c4406 100644
--- a/backend/src/gbe_bin_generater.cpp
+++ b/backend/src/gbe_bin_generater.cpp
@@ -39,6 +39,7 @@
 #include "backend/program.h"
 #include "backend/program.hpp"
 #include "backend/src/sys/platform.hpp"
+#include "src/cl_device_data.h"
 
 using namespace std;
 
@@ -159,6 +160,22 @@ void program_build_instance::serialize_program(void) 
throw(int)
 size_t sz = 0, header_sz = 0;
 ofs.open(bin_path, ofstream::out | ofstream::trunc | ofstream::binary);
 
+char src_hw_info[4]="";
+if(IS_IVYBRIDGE(gen_pci_id)){
+  src_hw_info[0]='I';
+  src_hw_info[1]='V';
+  src_hw_info[2]='B';
+  if(IS_BAYTRAIL_T(gen_pci_id)){
+src_hw_info[0]='B';
+src_hw_info[1]='Y';
+src_hw_info[2]='T';
+  }
+ 

[Beignet] [Patch V3 2/2] add utest load_program_from_gen_bin.

2014-07-15 Thread xionghu . luo
From: LuoXionghu 

this test case would check whether genProgramSerializeToBinary in
backend can generator gen binary correctly.

rename load_program_from_bin to load_program_from_bin_file.
the difference is load_program_from_bin_file could either load program
from llvm binary or gen binary file generated by gbe_bin_generator.

Signed-off-by: LuoXionghu 
---
 utests/CMakeLists.txt |  3 +-
 utests/load_program_from_bin.cpp  | 77 -
 utests/load_program_from_bin_file.cpp | 77 +
 utests/load_program_from_gen_bin.cpp  | 93 +++
 4 files changed, 172 insertions(+), 78 deletions(-)
 delete mode 100644 utests/load_program_from_bin.cpp
 create mode 100644 utests/load_program_from_bin_file.cpp
 create mode 100644 utests/load_program_from_gen_bin.cpp

diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 3614c57..561744d 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -171,7 +171,8 @@ set (utests_sources
   compiler_simd_any.cpp
   compiler_simd_all.cpp
   compiler_double_precision.cpp
-  load_program_from_bin.cpp
+  load_program_from_bin_file.cpp
+  load_program_from_gen_bin.cpp
   get_arg_info.cpp
   profiling_exec.cpp
   enqueue_copy_buf.cpp
diff --git a/utests/load_program_from_bin.cpp b/utests/load_program_from_bin.cpp
deleted file mode 100644
index d45c2bd..000
--- a/utests/load_program_from_bin.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-#include "utest_helper.hpp"
-#include "utest_file_map.hpp"
-#include 
-#include 
-
-using namespace std;
-
-static void cpu(int global_id, float *src, float *dst) {
-dst[global_id] = ceilf(src[global_id]);
-}
-
-static void test_load_program_from_bin(void)
-{
-const size_t n = 16;
-float cpu_dst[16], cpu_src[16];
-cl_int status;
-cl_int binary_status;
-char *ker_path = NULL;
-
-cl_file_map_t *fm = cl_file_map_new();
-ker_path = cl_do_kiss_path("compiler_ceil.bin", device);
-OCL_ASSERT (cl_file_map_open(fm, ker_path) == CL_FILE_MAP_SUCCESS);
-
-const unsigned char *src = (const unsigned char *)cl_file_map_begin(fm);
-const size_t sz = cl_file_map_size(fm);
-
-program = clCreateProgramWithBinary(ctx, 1,
-  &device, &sz, &src, &binary_status, &status);
-
-OCL_ASSERT(program && status == CL_SUCCESS);
-
-/* OCL requires to build the program even if it is created from a binary */
-OCL_ASSERT(clBuildProgram(program, 1, &device, NULL, NULL, NULL) == 
CL_SUCCESS);
-
-kernel = clCreateKernel(program, "compiler_ceil", &status);
-OCL_ASSERT(status == CL_SUCCESS);
-
-OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL);
-OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL);
-OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
-OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
-globals[0] = 16;
-locals[0] = 16;
-
-// Run random tests
-for (uint32_t pass = 0; pass < 8; ++pass) {
-OCL_MAP_BUFFER(0);
-for (int32_t i = 0; i < (int32_t) n; ++i)
-cpu_src[i] = ((float*)buf_data[0])[i] = .1f * (rand() & 15) - .75f;
-OCL_UNMAP_BUFFER(0);
-
-// Run the kernel on GPU
-OCL_NDRANGE(1);
-
-// Run on CPU
-for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, cpu_src, cpu_dst);
-
-// Compare
-OCL_MAP_BUFFER(1);
-
-#if 0
-printf(" GPU:\n");
-for (int32_t i = 0; i < (int32_t) n; ++i)
-printf(" %f", ((float *)buf_data[1])[i]);
-printf("\n CPU:\n");
-for (int32_t i = 0; i < (int32_t) n; ++i)
-printf(" %f", cpu_dst[i]);
-printf("\n");
-#endif
-
-for (int32_t i = 0; i < (int32_t) n; ++i)
-OCL_ASSERT(((float *)buf_data[1])[i] == cpu_dst[i]);
-OCL_UNMAP_BUFFER(1);
-}
-}
-
-MAKE_UTEST_FROM_FUNCTION(test_load_program_from_bin);
diff --git a/utests/load_program_from_bin_file.cpp 
b/utests/load_program_from_bin_file.cpp
new file mode 100644
index 000..feefacc
--- /dev/null
+++ b/utests/load_program_from_bin_file.cpp
@@ -0,0 +1,77 @@
+#include "utest_helper.hpp"
+#include "utest_file_map.hpp"
+#include 
+#include 
+
+using namespace std;
+
+static void cpu(int global_id, float *src, float *dst) {
+dst[global_id] = ceilf(src[global_id]);
+}
+
+static void test_load_program_from_bin_file(void)
+{
+const size_t n = 16;
+float cpu_dst[16], cpu_src[16];
+cl_int status;
+cl_int binary_status;
+char *ker_path = NULL;
+
+cl_file_map_t *fm = cl_file_map_new();
+ker_path = cl_do_kiss_path("compiler_ceil.bin", device);
+OCL_ASSERT (cl_file_map_open(fm, ker_path) == CL_FILE_MAP_SUCCESS);
+
+const unsigned char *src = (const unsigned char *)cl_file_map_begin(fm);
+const size_t sz = cl_file_map_size(fm);
+
+program = clCreateProgramWithBinary(ctx, 1,
+  &device, &sz, &src, &binary_status, &status);
+
+OCL_ASSERT(program && status == CL_SUCCESS);
+
+/* OCL requi

[Beignet] [PATCH] improve the build performance of vector type built-in function.

2014-07-23 Thread xionghu . luo
From: LuoXionghu 

expand the gentypen with loop to reduce the redundant inline.

Signed-off-by: LuoXionghu 
---
 backend/src/gen_builtin_vector.py | 42 +--
 1 file changed, 36 insertions(+), 6 deletions(-)

diff --git a/backend/src/gen_builtin_vector.py 
b/backend/src/gen_builtin_vector.py
index b100bbf..83e2bcb 100755
--- a/backend/src/gen_builtin_vector.py
+++ b/backend/src/gen_builtin_vector.py
@@ -283,9 +283,39 @@ class builtinProto():
 formatStr += '{0} {1}param{2}'.format(ptype[0], pointerStr, n)
 
 formatStr += ')'
-formatStr = self.append(formatStr, '{{return 
({0}{1})('.format(vtype[0], vtype[1]))
-self.indent = len(formatStr)
-for j in range(0, vtype[1]):
+if self.functionName != 'select' and ptypeSeqs[0] == 
ptypeSeqs[self.paramCount-1]:
+formatStr += '\n{ \n  union{'
+formatStr = self.append(formatStr, '{0} 
va[{1}];'.format(vtype[0], vtype[1]))
+formatStr = self.append(formatStr, '{0}{1} 
vv{2};'.format(vtype[0], vtype[1], vtype[1]))
+formatStr += '\n  }uret;'
+formatStr += '\n  union{'
+formatStr = self.append(formatStr, '{0} 
pa[{1}];'.format(ptype[0], ptype[1]))
+formatStr = self.append(formatStr, '{0}{1} 
pv{2};'.format(ptype[0], ptype[1], ptype[1]))
+formatStr += '\n  }'
+for n in range(0, self.paramCount):
+  formatStr += 'usrc{0}'.format(n)
+  if n+1 != self.paramCount:
+formatStr +=', '
+formatStr += ';'
+
+for n in range(0, self.paramCount):
+  formatStr = self.append(formatStr, '  usrc{0}.pv{1} = 
param{2};'.format(n, ptype[1], n))
+formatStr = self.append(formatStr, '  for(int i =0; i < {0}; 
i++)'.format(ptype[1]))
+formatStr = self.append(formatStr, 'uret.va[i] = 
{0}('.format(self.functionName))
+
+for n in range(0, self.paramCount):
+  formatStr += 'usrc{0}.pa[i]'.format(n)
+  if n+1 != self.paramCount:
+formatStr +=', '
+formatStr += ');'
+formatStr = self.append(formatStr, ' return 
uret.vv{0};'.format(vtype[1]))
+formatStr += '\n}'
+formatStr = self.append(formatStr)
+return formatStr
+else:
+  formatStr = self.append(formatStr, '{{return 
({0}{1})('.format(vtype[0], vtype[1]))
+  self.indent = len(formatStr)
+  for j in range(0, vtype[1]):
 if (j != 0):
 formatStr += ','
 if (j + 1) % 2 == 0:
@@ -320,10 +350,10 @@ class builtinProto():
 
 formatStr += ')'
 
-formatStr += '); }\n'
-self.append(formatStr)
+  formatStr += '); }\n'
+  self.append(formatStr)
 
-return formatStr
+  return formatStr
 
 def output(self):
 for line in self.outputStr:
-- 
1.8.1.2

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH v2] improve the build performance of vector type built-in function.

2014-07-31 Thread xionghu . luo
From: LuoXionghu 

expand the gentypen with loop to reduce the redundant inline for more
than 4 components type.

v2: add the  greater than 4 componets conditon to avoid performace
degration.

Signed-off-by: Luo Xionghu 
---
 backend/src/gen_builtin_vector.py | 42 +--
 1 file changed, 36 insertions(+), 6 deletions(-)

diff --git a/backend/src/gen_builtin_vector.py 
b/backend/src/gen_builtin_vector.py
index b100bbf..15dbaf4 100755
--- a/backend/src/gen_builtin_vector.py
+++ b/backend/src/gen_builtin_vector.py
@@ -283,9 +283,39 @@ class builtinProto():
 formatStr += '{0} {1}param{2}'.format(ptype[0], pointerStr, n)
 
 formatStr += ')'
-formatStr = self.append(formatStr, '{{return 
({0}{1})('.format(vtype[0], vtype[1]))
-self.indent = len(formatStr)
-for j in range(0, vtype[1]):
+if self.functionName != 'select' and ptypeSeqs[0] == 
ptypeSeqs[self.paramCount-1] and ptype[1] > 4:
+formatStr += '\n{ \n  union{'
+formatStr = self.append(formatStr, '{0} 
va[{1}];'.format(vtype[0], vtype[1]))
+formatStr = self.append(formatStr, '{0}{1} 
vv{2};'.format(vtype[0], vtype[1], vtype[1]))
+formatStr += '\n  }uret;'
+formatStr += '\n  union{'
+formatStr = self.append(formatStr, '{0} 
pa[{1}];'.format(ptype[0], ptype[1]))
+formatStr = self.append(formatStr, '{0}{1} 
pv{2};'.format(ptype[0], ptype[1], ptype[1]))
+formatStr += '\n  }'
+for n in range(0, self.paramCount):
+  formatStr += 'usrc{0}'.format(n)
+  if n+1 != self.paramCount:
+formatStr +=', '
+formatStr += ';'
+
+for n in range(0, self.paramCount):
+  formatStr = self.append(formatStr, '  usrc{0}.pv{1} = 
param{2};'.format(n, ptype[1], n))
+formatStr = self.append(formatStr, '  for(int i =0; i < {0}; 
i++)'.format(ptype[1]))
+formatStr = self.append(formatStr, 'uret.va[i] = 
{0}('.format(self.functionName))
+
+for n in range(0, self.paramCount):
+  formatStr += 'usrc{0}.pa[i]'.format(n)
+  if n+1 != self.paramCount:
+formatStr +=', '
+formatStr += ');'
+formatStr = self.append(formatStr, ' return 
uret.vv{0};'.format(vtype[1]))
+formatStr += '\n}'
+formatStr = self.append(formatStr)
+return formatStr
+else:
+  formatStr = self.append(formatStr, '{{return 
({0}{1})('.format(vtype[0], vtype[1]))
+  self.indent = len(formatStr)
+  for j in range(0, vtype[1]):
 if (j != 0):
 formatStr += ','
 if (j + 1) % 2 == 0:
@@ -320,10 +350,10 @@ class builtinProto():
 
 formatStr += ')'
 
-formatStr += '); }\n'
-self.append(formatStr)
+  formatStr += '); }\n'
+  self.append(formatStr)
 
-return formatStr
+  return formatStr
 
 def output(self):
 for line in self.outputStr:
-- 
1.8.1.2

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] fix the relational built-in vector function regression.

2014-08-05 Thread xionghu . luo
From: Luo Xionghu 

the relational vector function need return -1 instead of 1 according to
the spec.

Signed-off-by: Luo Xionghu 
---
 backend/src/gen_builtin_vector.py | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/backend/src/gen_builtin_vector.py 
b/backend/src/gen_builtin_vector.py
index 15dbaf4..5f1c4b7 100755
--- a/backend/src/gen_builtin_vector.py
+++ b/backend/src/gen_builtin_vector.py
@@ -301,7 +301,10 @@ class builtinProto():
 for n in range(0, self.paramCount):
   formatStr = self.append(formatStr, '  usrc{0}.pv{1} = 
param{2};'.format(n, ptype[1], n))
 formatStr = self.append(formatStr, '  for(int i =0; i < {0}; 
i++)'.format(ptype[1]))
-formatStr = self.append(formatStr, 'uret.va[i] = 
{0}('.format(self.functionName))
+formatStr += '\nuret.va[i] = '
+if self.prefix == 'relational' and self.functionName != 
'bitselect' and self.functionName != 'select':
+  formatStr += '-'
+formatStr += '{0}('.format(self.functionName)
 
 for n in range(0, self.paramCount):
   formatStr += 'usrc{0}.pa[i]'.format(n)
-- 
1.8.3.2

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] fix opencv_test_imgproc subcase OCL_ImgProc/Accumulate.Mask regression.

2014-08-25 Thread xionghu . luo
From: Luo Xionghu 

This regression is caused by structural analysis when check the if-then
node, acturally there are four types of if-then node according to the
topology and fallthrough information. fallthrough check is added in this
patch.

Signed-off-by: Luo Xionghu 
---
 backend/src/backend/gen_insn_selection.cpp |4 +++-
 backend/src/ir/function.hpp|5 +
 backend/src/ir/structural_analysis.cpp |9 -
 backend/src/ir/structural_analysis.hpp |   16 +++-
 4 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index b7a39af..9a552b1 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -4018,7 +4018,9 @@ namespace gbe
   sel.curr.physicalFlag = 0;
   sel.curr.flagIndex = (uint64_t)pred;
   sel.curr.externFlag = 1;
-  sel.curr.inversePredicate = 1;
+  if(insn.getParent()->need_reverse ){
+sel.curr.inversePredicate = 1;
+  }
   sel.curr.predicate = GEN_PREDICATE_NORMAL;
   sel.IF(GenRegister::immd(0), jip, uip);
   sel.curr.inversePredicate = 0;
diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp
index c5582b4..b877bce 100644
--- a/backend/src/ir/function.hpp
+++ b/backend/src/ir/function.hpp
@@ -87,6 +87,11 @@ namespace ir {
 set  definedPhiRegs;
   /* these three are used by structure transforming */
   public:
+/*if need_reverse is true,  need to reverse prediction.
+ *if condition is TRUE, IF instruction will execute the following block,
+ * different from BRA instruction, so all the IF instruction need_reverse
+ * except two special case(fallthrough is the same with succs.). */
+bool need_reverse;
 /* if needEndif is true, it means that this bb is the exit of an
  * outermost structure, so this block needs another endif to match
  * the if inserted at the entry of this structure, otherwise this
diff --git a/backend/src/ir/structural_analysis.cpp 
b/backend/src/ir/structural_analysis.cpp
index dfc2118..c106fa7 100644
--- a/backend/src/ir/structural_analysis.cpp
+++ b/backend/src/ir/structural_analysis.cpp
@@ -120,6 +120,7 @@ namespace analysis
 /* since this node is an if node, so we remove the BRA instruction at the 
bottom of the exit BB of 'node',
  * and insert IF instead
  */
+pbb->need_reverse = node->need_reverse;
 pbb->erase(it);
 ir::Instruction insn = ir::IF(matchingElseLabel, reg);
 ir::Instruction* p_new_insn = pbb->getParent().newInstruction(insn);
@@ -724,7 +725,7 @@ namespace analysis
   n = *(++(node->succs().begin()));
 
   /* check for if node then n */
-  if(n->succs().size() == 1 &&
+  if( n->succs().size() == 1 &&
  n->preds().size() == 1 &&
  *(n->succs().begin()) == m &&
  !n->hasBarrier() && !node->hasBarrier())
@@ -734,6 +735,9 @@ namespace analysis
 nset.insert(n);
 
 Node* p = new IfThenNode(node, n);
+if(node->fallthrough() == m){
+  node->need_reverse = false;
+}
 
 if(node->canBeHandled == false || n->canBeHandled == false)
   p->canBeHandled = false;
@@ -752,6 +756,9 @@ namespace analysis
 nset.insert(m);
 
 Node* p = new IfThenNode(node, m);
+if(node->fallthrough() == n){
+  node->need_reverse = false;
+}
 
 if(node->canBeHandled == false || m->canBeHandled == false)
   p->canBeHandled = false;
diff --git a/backend/src/ir/structural_analysis.hpp 
b/backend/src/ir/structural_analysis.hpp
index 06c2f5f..f7a34d1 100644
--- a/backend/src/ir/structural_analysis.hpp
+++ b/backend/src/ir/structural_analysis.hpp
@@ -87,7 +87,7 @@ namespace analysis
   class Node
   {
   public:
-Node(RegionType rtype, const NodeList& children): has_barrier(false), 
mark(false), canBeHandled(true)
+Node(RegionType rtype, const NodeList& children): has_barrier(false), 
mark(false), canBeHandled(true), need_reverse(true)
 {
   this->rtype = rtype;
   this->children = children;
@@ -118,6 +118,20 @@ namespace analysis
 bool canBeHandled;
 //label is for debug
 int label;
+/* need_reverse should be false under two circumstance,
+ * fallthrough is the same with succs:
+ * (1) n->succs == m && node->fallthrough == m
+ * node
+ * | \
+ * |  \
+ * m<--n
+ * (2) m->succs == n && node->fallthrough == n
+ * node
+ * | \
+ * |  \
+ * m-->n
+ * */
+bool need_reverse;
   };
 
   /* represents basic block */
-- 
1.7.9.5

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH v2] fix opencv_test_imgproc subcase OCL_ImgProc/Accumulate.Mask regression.

2014-08-26 Thread xionghu . luo
From: Luo Xionghu 

This regression is caused by structural analysis when check the if-then
node, acturally there are four types of if-then node according to the
topology and fallthrough information. fallthrough check is added in this
patch.

v2: add inversePredicate member and function for BranchInstruction;
print the exact meanning of IF instruction in GEN_IR.

Signed-off-by: Luo Xionghu 
---
 backend/src/backend/gen_insn_selection.cpp |2 +-
 backend/src/ir/instruction.cpp |   12 +---
 backend/src/ir/instruction.hpp |4 +++-
 backend/src/ir/structural_analysis.cpp |   10 --
 backend/src/ir/structural_analysis.hpp |   16 +++-
 5 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index b7a39af..170a9d8 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -4018,7 +4018,7 @@ namespace gbe
   sel.curr.physicalFlag = 0;
   sel.curr.flagIndex = (uint64_t)pred;
   sel.curr.externFlag = 1;
-  sel.curr.inversePredicate = 1;
+  sel.curr.inversePredicate = insn.getInversePredicated();
   sel.curr.predicate = GEN_PREDICATE_NORMAL;
   sel.IF(GenRegister::immd(0), jip, uip);
   sel.curr.inversePredicate = 0;
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index bfb2000..370fb87 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -348,13 +348,14 @@ namespace ir {
   public NDstPolicy
 {
 public:
-  INLINE BranchInstruction(Opcode op, LabelIndex labelIndex, Register 
predicate) {
+  INLINE BranchInstruction(Opcode op, LabelIndex labelIndex, Register 
predicate, bool inv_pred=false) {
 GBE_ASSERT(op == OP_BRA || op == OP_IF);
 this->opcode = op;
 this->predicate = predicate;
 this->labelIndex = labelIndex;
 this->hasPredicate = true;
 this->hasLabel = true;
+this->inversePredicate = inv_pred;
   }
   INLINE BranchInstruction(Opcode op, LabelIndex labelIndex) {
 GBE_ASSERT(op == OP_BRA || op == OP_ELSE || op == OP_ENDIF);
@@ -385,11 +386,13 @@ namespace ir {
 predicate = reg;
   }
   INLINE bool isPredicated(void) const { return hasPredicate; }
+  INLINE bool getInversePredicated(void) const { return inversePredicate; }
   INLINE bool wellFormed(const Function &fn, std::string &why) const;
   INLINE void out(std::ostream &out, const Function &fn) const;
   Register predicate;//!< Predication means conditional branch
   LabelIndex labelIndex; //!< Index of the label the branch targets
   bool hasPredicate:1;   //!< Is it predicated?
+  bool inversePredicate:1;   //!< Is it inverse predicated?
   bool hasLabel:1;   //!< Is there any target label?
   Register dst[0];   //!< No destination
 };
@@ -1142,6 +1145,8 @@ namespace ir {
 
 INLINE void BranchInstruction::out(std::ostream &out, const Function &fn) 
const {
   this->outOpcode(out);
+  if(opcode == OP_IF && inversePredicate)
+out << " !";
   if (hasPredicate)
 out << "<%" << this->getSrc(fn, 0) << ">";
   if (hasLabel) out << " -> label$" << labelIndex;
@@ -1463,6 +1468,7 @@ DECL_MEM_FN(LoadInstruction, bool, isAligned(void), 
isAligned())
 DECL_MEM_FN(LoadImmInstruction, Type, getType(void), getType())
 DECL_MEM_FN(LabelInstruction, LabelIndex, getLabelIndex(void), getLabelIndex())
 DECL_MEM_FN(BranchInstruction, bool, isPredicated(void), isPredicated())
+DECL_MEM_FN(BranchInstruction, bool, getInversePredicated(void), 
getInversePredicated())
 DECL_MEM_FN(BranchInstruction, LabelIndex, getLabelIndex(void), 
getLabelIndex())
 DECL_MEM_FN(SyncInstruction, uint32_t, getParameters(void), getParameters())
 DECL_MEM_FN(SampleInstruction, Type, getSrcType(void), getSrcType())
@@ -1615,8 +1621,8 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, 
getImageIndex(void), getImageIndex
   }
 
   // IF
-  Instruction IF(LabelIndex labelIndex, Register pred) {
-return internal::BranchInstruction(OP_IF, labelIndex, pred).convert();
+  Instruction IF(LabelIndex labelIndex, Register pred, bool inv_pred) {
+return internal::BranchInstruction(OP_IF, labelIndex, pred, 
inv_pred).convert();
   }
 
   // ELSE
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index e245638..39fb2db 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -430,6 +430,8 @@ namespace ir {
   public:
 /*! Indicate if the branch is predicated */
 bool isPredicated(void) const;
+/*! Indicate if the branch is inverse predicated */
+bool getInversePredicated(void) const;
 /*! Return the predicate register (if predicated) */
 RegisterData getPredicate(void) const {
   GBE_ASSERTM(this->isPredicated(

[Beignet] [PATCH] fix piglit cl-api-get-program-info fail.

2014-09-08 Thread xionghu . luo
From: Luo Xionghu 

add pointer check.

Signed-off-by: Luo Xionghu 
---
 src/cl_program.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cl_program.c b/src/cl_program.c
index c141a35..a745c00 100644
--- a/src/cl_program.c
+++ b/src/cl_program.c
@@ -743,7 +743,7 @@ cl_program_get_kernel_names(cl_program p, size_t size, char 
*names, size_t *size
   int i = 0;
   const char *ker_name = NULL;
   size_t len = 0;
-  *size_ret = 0;
+  if(size_ret) *size_ret = 0;
 
   if(p->ker == NULL) {
 return;
-- 
1.7.9.5

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] fix piglit clGetKernelWorkGroupInfo fail.

2014-09-08 Thread xionghu . luo
From: Luo Xionghu 

add CL_KERNEL_GLOBAL_WORK_SIZE option for clGetKernelWorkGroupInfo.

Signed-off-by: Luo Xionghu 
---
 src/cl_api.c   |3 +++
 src/cl_device_id.c |1 +
 src/cl_kernel.h|2 ++
 3 files changed, 6 insertions(+)

diff --git a/src/cl_api.c b/src/cl_api.c
index 2370dc0..07e8954 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -2965,6 +2965,9 @@ clEnqueueNDRangeKernel(cl_command_queue  command_queue,
 }
   }
 
+  for (i = 0; i < work_dim; ++i)
+kernel->global_work_sz[i] = fixed_global_sz[i];
+
   /* Do device specific checks are enqueue the kernel */
   err = cl_command_queue_ND_range(command_queue,
   kernel,
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index a0f0c99..6bd80a6 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -573,6 +573,7 @@ cl_get_kernel_workgroup_info(cl_kernel kernel,
 }
 DECL_FIELD(COMPILE_WORK_GROUP_SIZE, kernel->compile_wg_sz)
 DECL_FIELD(PRIVATE_MEM_SIZE, kernel->stack_size)
+DECL_FIELD(GLOBAL_WORK_SIZE, kernel->global_work_sz)
 default:
   return CL_INVALID_VALUE;
   };
diff --git a/src/cl_kernel.h b/src/cl_kernel.h
index f4ed8d3..85a997d 100644
--- a/src/cl_kernel.h
+++ b/src/cl_kernel.h
@@ -59,6 +59,8 @@ struct _cl_kernel {
   cl_ulong local_mem_sz;  /* local memory size specified in kernel args. */
   size_t compile_wg_sz[3];/* Required workgroup size by 
__attribute__((reqd_work_gro
  up_size(X, Y, Z))) qualifier.*/
+  size_t global_work_sz[3];/* maximum global size that can be used to 
execute a kernel
+(i.e. global_work_size argument to 
clEnqueueNDRangeKernel.)*/
   size_t stack_size;  /* stack size per work item. */
   cl_argument *args;  /* To track argument setting */
   uint32_t arg_n:31;  /* Number of arguments */
-- 
1.7.9.5

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] fix clGetKernelWorkGroupInfo built-in kernel fail.

2014-09-10 Thread xionghu . luo
From: Luo Xionghu 

add CL_KERNEL_GLOBAL_WORK_SIZE option for clGetKernelWorkGroupInfo.

v2: should return the max global work size instead of current work size.
This funtion need return CL_INVALID_VALUE if the device is not a custom
device or kernel is not a built-in kernel.
we have 3 kind of built-in kernels for 1d/2d/3d memories, the max global
work size are decided by the dimension and memory type.
the piglit fail is caused by calling NON built-in kernels, so need send
patch to piglit later.

Signed-off-by: Luo Xionghu 
---
 src/cl_device_id.c|   34 +
 src/cl_device_id.h|3 +++
 src/cl_gt_device.h|3 +++
 src/cl_kernel.h   |2 ++
 utests/CMakeLists.txt |1 +
 utests/builtin_kernel_max_global_size.cpp |   30 +
 6 files changed, 73 insertions(+)
 create mode 100644 utests/builtin_kernel_max_global_size.cpp

diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index a0f0c99..5b24fcb 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -515,6 +515,22 @@ cl_device_get_version(cl_device_id device, cl_int *ver)
 
 #include "cl_kernel.h"
 #include "cl_program.h"
+LOCAL int
+cl_check_builtin_kernel_dimension(cl_kernel kernel, cl_device_id device)
+{
+  const char * n = cl_kernel_get_name(kernel);
+  const char * builtin_kernels_2d = 
"__cl_copy_image_2d_to_2d;__cl_copy_image_2d_to_buffer;__cl_copy_buffer_to_image_2d;__cl_fill_image_2d;__cl_fill_image_2d_array;";
+  const char * builtin_kernels_3d = 
"__cl_copy_image_3d_to_2d;__cl_copy_image_2d_to_3d;__cl_copy_image_3d_to_3d;__cl_copy_image_3d_to_buffer;__cl_copy_buffer_to_image_3d;__cl_fill_image_3d";
+if (!strstr(device->built_in_kernels, n)){
+  return 0;
+}else if(strstr(builtin_kernels_2d, n)){
+  return 2;
+}else if(strstr(builtin_kernels_3d, n)){
+  return 3;
+}else
+  return 1;
+
+}
 
 LOCAL size_t
 cl_get_kernel_max_wg_sz(cl_kernel kernel)
@@ -543,6 +559,7 @@ cl_get_kernel_workgroup_info(cl_kernel kernel,
  size_t* param_value_size_ret)
 {
   int err = CL_SUCCESS;
+  int dimension = 0;
   if (UNLIKELY(device != &intel_ivb_gt1_device &&
device != &intel_ivb_gt2_device &&
device != &intel_baytrail_t_device &&
@@ -573,6 +590,23 @@ cl_get_kernel_workgroup_info(cl_kernel kernel,
 }
 DECL_FIELD(COMPILE_WORK_GROUP_SIZE, kernel->compile_wg_sz)
 DECL_FIELD(PRIVATE_MEM_SIZE, kernel->stack_size)
+case CL_KERNEL_GLOBAL_WORK_SIZE:
+  dimension = cl_check_builtin_kernel_dimension(kernel, device);
+  if ( !dimension ) return CL_INVALID_VALUE;
+  if (param_value_size_ret != NULL)
+*param_value_size_ret = sizeof(device->max_1d_global_work_sizes);
+  if (param_value) {
+if (dimension == 1) {
+  memcpy(param_value, device->max_1d_global_work_sizes, 
sizeof(device->max_1d_global_work_sizes));
+}else if(dimension == 2){
+  memcpy(param_value, device->max_2d_global_work_sizes, 
sizeof(device->max_2d_global_work_sizes));
+}else if(dimension == 3){
+  memcpy(param_value, device->max_3d_global_work_sizes, 
sizeof(device->max_3d_global_work_sizes));
+}else
+  return CL_INVALID_VALUE;
+
+return CL_SUCCESS;
+  }
 default:
   return CL_INVALID_VALUE;
   };
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
index c4f8227..31bce47 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -30,6 +30,9 @@ struct _cl_device_id {
   cl_uint  max_work_item_dimensions;   // should be 3.
   size_t   max_work_item_sizes[3]; // equal to maximum work group size.
   size_t   max_work_group_size;// maximum work group size under 
simd16 mode.
+  size_t   max_1d_global_work_sizes[3];   // maximum 1d global work size 
for builtin kernels.
+  size_t   max_2d_global_work_sizes[3];   // maximum 2d global work size 
for builtin kernels.
+  size_t   max_3d_global_work_sizes[3];   // maximum 3d global work size 
for builtin kernels.
   cl_uint  preferred_vector_width_char;
   cl_uint  preferred_vector_width_short;
   cl_uint  preferred_vector_width_int;
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index 33ef1f0..3cd54eb 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -21,6 +21,9 @@
 .device_type = CL_DEVICE_TYPE_GPU,
 .vendor_id = 0, /* == device_id (set when requested) */
 .max_work_item_dimensions = 3,
+.max_1d_global_work_sizes = {1024 * 1024 * 256, 1, 1},
+.max_2d_global_work_sizes = {8192, 8192, 1},
+.max_3d_global_work_sizes = {8192, 8192, 2048},
 .preferred_vector_width_char = 8,
 .preferred_vector_width_short = 8,
 .preferred_vector_width_int = 4,
diff --git a/src/cl_kernel.h b/src/cl_kernel.h
index f4ed8d3..85a997d 100644
--- a/src/cl_kernel.h
+++ b/src/cl_kernel.h
@@ -59,6 +59,8 @@ struct _cl_kernel {
   cl_ulong local_mem_sz;  /* local 

[Beignet] [PATCH] fix piglit cl-api-set-kernel-arg fail.

2014-09-10 Thread xionghu . luo
From: Luo Xionghu 

the memory object should be checked whether valid in context buffers before 
being set as kernel arguments.

Signed-off-by: Luo Xionghu 
---
 src/cl_kernel.c |4 
 src/cl_mem.c|   12 
 src/cl_mem.h|3 +++
 3 files changed, 19 insertions(+)

diff --git a/src/cl_kernel.c b/src/cl_kernel.c
index 5ab9c55..8eec907 100644
--- a/src/cl_kernel.c
+++ b/src/cl_kernel.c
@@ -99,6 +99,7 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, 
const void *value)
   enum gbe_arg_type arg_type; /* kind of argument */
   size_t arg_sz;  /* size of the argument */
   cl_mem mem = NULL;  /* for __global, __constant and image arguments 
*/
+  cl_context ctx = k->program->ctx;
 
   if (UNLIKELY(index >= k->arg_n))
 return CL_INVALID_ARG_INDEX;
@@ -136,6 +137,9 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, 
const void *value)
 if(value != NULL)
   mem = *(cl_mem*)value;
 if(value != NULL && mem) {
+  if (!mem_in_buffers(mem, ctx->buffers))
+return CL_INVALID_ARG_VALUE;
+
   if (UNLIKELY(mem->magic != CL_MAGIC_MEM_HEADER))
 return CL_INVALID_MEM_OBJECT;
 
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 11411d9..d4bbe2c 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -289,6 +289,18 @@ error:
 
 }
 
+LOCAL cl_bool
+mem_in_buffers(cl_mem mem, cl_mem buffers)
+{
+  cl_mem tmp = buffers;
+  while(tmp){
+if(mem == tmp)
+  return CL_TRUE;
+tmp = tmp->next;
+  }
+  return CL_FALSE;
+}
+
 LOCAL cl_mem
 cl_mem_new_buffer(cl_context ctx,
   cl_mem_flags flags,
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 57f38f1..3bcad18 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -177,6 +177,9 @@ extern cl_int cl_get_mem_object_info(cl_mem, cl_mem_info, 
size_t, void *, size_t
 /* Query information about an image */
 extern cl_int cl_get_image_info(cl_mem, cl_image_info, size_t, void *, size_t 
*);
 
+/* Query whether mem is in buffers */
+extern cl_bool mem_in_buffers(cl_mem mem, cl_mem buffers);
+
 /* Create a new memory object and initialize it with possible user data */
 extern cl_mem cl_mem_new_buffer(cl_context, cl_mem_flags, size_t, void*, 
cl_int*);
 
-- 
1.7.9.5

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] fix piglit cl-api-set-kernel-arg fail.

2014-09-10 Thread xionghu . luo
From: Luo Xionghu 

the memory object should be checked whether valid in context buffers before 
being set as kernel arguments.

v2: rename the function from mem_in_buffers to is_valid_mem, move the
magic header check into it.

Signed-off-by: Luo Xionghu 
---
 src/cl_kernel.c |3 ++-
 src/cl_mem.c|   15 +++
 src/cl_mem.h|3 +++
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/src/cl_kernel.c b/src/cl_kernel.c
index 5ab9c55..d7c2f7c 100644
--- a/src/cl_kernel.c
+++ b/src/cl_kernel.c
@@ -99,6 +99,7 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, 
const void *value)
   enum gbe_arg_type arg_type; /* kind of argument */
   size_t arg_sz;  /* size of the argument */
   cl_mem mem = NULL;  /* for __global, __constant and image arguments 
*/
+  cl_context ctx = k->program->ctx;
 
   if (UNLIKELY(index >= k->arg_n))
 return CL_INVALID_ARG_INDEX;
@@ -136,7 +137,7 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, 
const void *value)
 if(value != NULL)
   mem = *(cl_mem*)value;
 if(value != NULL && mem) {
-  if (UNLIKELY(mem->magic != CL_MAGIC_MEM_HEADER))
+  if( CL_SUCCESS != is_valid_mem(mem, ctx->buffers))
 return CL_INVALID_MEM_OBJECT;
 
   if (UNLIKELY((arg_type == GBE_ARG_IMAGE && !IS_IMAGE(mem))
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 11411d9..077f1d7 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -289,6 +289,21 @@ error:
 
 }
 
+LOCAL cl_int
+is_valid_mem(cl_mem mem, cl_mem buffers)
+{
+  cl_mem tmp = buffers;
+  while(tmp){
+if(mem == tmp){
+  if (UNLIKELY(mem->magic != CL_MAGIC_MEM_HEADER))
+return CL_INVALID_MEM_OBJECT;
+  return CL_SUCCESS;
+}
+tmp = tmp->next;
+  }
+  return CL_INVALID_MEM_OBJECT;
+}
+
 LOCAL cl_mem
 cl_mem_new_buffer(cl_context ctx,
   cl_mem_flags flags,
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 57f38f1..0ccbb5d 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -177,6 +177,9 @@ extern cl_int cl_get_mem_object_info(cl_mem, cl_mem_info, 
size_t, void *, size_t
 /* Query information about an image */
 extern cl_int cl_get_image_info(cl_mem, cl_image_info, size_t, void *, size_t 
*);
 
+/* Query whether mem is in buffers */
+extern cl_int is_valid_mem(cl_mem mem, cl_mem buffers);
+
 /* Create a new memory object and initialize it with possible user data */
 extern cl_mem cl_mem_new_buffer(cl_context, cl_mem_flags, size_t, void*, 
cl_int*);
 
-- 
1.7.9.5

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] fix piglit get kernel info FUNCTION ATTRIBUTE fail.

2014-09-12 Thread xionghu . luo
From: Luo 

the backend need return the kernel FUNCTION ATTRIBUTE message to the
clGetKernelInfo.
there are 3 kind of function attribute so far, vec_type_hint parameter
is not available to return due to llvm lack of such info.

Signed-off-by: Luo 
---
 backend/src/backend/program.cpp |9 +++
 backend/src/backend/program.h   |4 +++
 backend/src/backend/program.hpp |6 +
 backend/src/gbe_bin_interpreter.cpp |1 +
 backend/src/ir/function.hpp |5 
 backend/src/llvm/llvm_gen_backend.cpp   |   45 +++
 src/cl_api.c|3 +++
 src/cl_gbe_loader.cpp   |5 
 src/cl_gbe_loader.h |1 +
 src/cl_kernel.c |7 +
 src/cl_kernel.h |3 +++
 utests/CMakeLists.txt   |1 +
 utests/compiler_function_qualifiers.cpp |   10 +++
 13 files changed, 100 insertions(+)

diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index be83108..2308770 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -154,6 +154,7 @@ namespace gbe {
   kernel->setImageSet(pair.second->getImageSet());
   kernel->setPrintfSet(pair.second->getPrintfSet());
   kernel->setCompileWorkGroupSize(pair.second->getCompileWorkGroupSize());
+  kernel->setFunctionAttributes(pair.second->getFunctionAttributes());
   kernels.insert(std::make_pair(name, kernel));
 }
 return true;
@@ -895,6 +896,12 @@ namespace gbe {
 return kernel->getName();
   }
 
+  static const char *kernelGetAttributes(gbe_kernel genKernel) {
+if (genKernel == NULL) return NULL;
+const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel;
+return kernel->getFunctionAttributes();
+  }
+
   static const char *kernelGetCode(gbe_kernel genKernel) {
 if (genKernel == NULL) return NULL;
 const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel;
@@ -,6 +1118,7 @@ GBE_EXPORT_SYMBOL gbe_program_get_kernel_num_cb 
*gbe_program_get_kernel_num = NU
 GBE_EXPORT_SYMBOL gbe_program_get_kernel_by_name_cb 
*gbe_program_get_kernel_by_name = NULL;
 GBE_EXPORT_SYMBOL gbe_program_get_kernel_cb *gbe_program_get_kernel = NULL;
 GBE_EXPORT_SYMBOL gbe_kernel_get_name_cb *gbe_kernel_get_name = NULL;
+GBE_EXPORT_SYMBOL gbe_kernel_get_attributes_cb *gbe_kernel_get_attributes = 
NULL;
 GBE_EXPORT_SYMBOL gbe_kernel_get_code_cb *gbe_kernel_get_code = NULL;
 GBE_EXPORT_SYMBOL gbe_kernel_get_code_size_cb *gbe_kernel_get_code_size = NULL;
 GBE_EXPORT_SYMBOL gbe_kernel_get_arg_num_cb *gbe_kernel_get_arg_num = NULL;
@@ -1158,6 +1166,7 @@ namespace gbe
   gbe_program_get_kernel_by_name = gbe::programGetKernelByName;
   gbe_program_get_kernel = gbe::programGetKernel;
   gbe_kernel_get_name = gbe::kernelGetName;
+  gbe_kernel_get_attributes = gbe::kernelGetAttributes;
   gbe_kernel_get_code = gbe::kernelGetCode;
   gbe_kernel_get_code_size = gbe::kernelGetCodeSize;
   gbe_kernel_get_arg_num = gbe::kernelGetArgNum;
diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h
index c63ae6a..0e773f4 100644
--- a/backend/src/backend/program.h
+++ b/backend/src/backend/program.h
@@ -271,6 +271,10 @@ extern gbe_program_get_kernel_cb *gbe_program_get_kernel;
 typedef const char *(gbe_kernel_get_name_cb)(gbe_kernel);
 extern gbe_kernel_get_name_cb *gbe_kernel_get_name;
 
+/*! Get the kernel attributes*/
+typedef const char *(gbe_kernel_get_attributes_cb)(gbe_kernel);
+extern gbe_kernel_get_attributes_cb *gbe_kernel_get_attributes;
+
 /*! Get the kernel source code */
 typedef const char *(gbe_kernel_get_code_cb)(gbe_kernel);
 extern gbe_kernel_get_code_cb *gbe_kernel_get_code;
diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp
index 6a8af61..4f9b68a 100644
--- a/backend/src/backend/program.hpp
+++ b/backend/src/backend/program.hpp
@@ -176,6 +176,11 @@ namespace gbe {
wg_sz[1] = compileWgSize[1];
wg_sz[2] = compileWgSize[2];
 }
+/*! Set function attributes string. */
+void setFunctionAttributes(const std::string& functionAttributes) {  
this->functionAttributes= functionAttributes; }
+/*! Get function attributes string. */
+const char* getFunctionAttributes(void) const {return 
this->functionAttributes.c_str();}
+
 /*! Get defined image size */
 size_t getImageSize(void) const { return (imageSet == NULL ? 0 : 
imageSet->getDataSize()); }
 /*! Get defined image value array */
@@ -228,6 +233,7 @@ namespace gbe {
 ir::ImageSet *imageSet;//!< Copy from the corresponding function.
 ir::PrintfSet *printfSet;  //!< Copy from the corresponding function.
 size_t compileWgSize[3];   //!< required work group size by kernel 
attribute.
+std::string functionAttributes; //!< function attribute qualifiers 
combined.
 GBE_CLASS(Kernel); //!< Use custom all

[Beignet] [PATCH] fix bin/cl-program-tester tests/cl/program/execute/attributes.cl regression.

2014-09-14 Thread xionghu . luo
From: Luo Xionghu 

work_group_size_hint should define another variable.

Signed-off-by: Luo Xionghu 
---
 backend/src/llvm/llvm_gen_backend.cpp |   13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/backend/src/llvm/llvm_gen_backend.cpp 
b/backend/src/llvm/llvm_gen_backend.cpp
index b0e02ca..918af24 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1249,6 +1249,7 @@ namespace gbe
 // Loop over the kernel metadatas to set the required work group size.
 NamedMDNode *clKernelMetaDatas = 
TheModule->getNamedMetadata("opencl.kernels");
 size_t reqd_wg_sz[3] = {0, 0, 0};
+size_t hint_wg_sz[3] = {0, 0, 0};
 ir::FunctionArgument::InfoFromLLVM llvmInfo;
 MDNode *node = NULL;
 MDNode *addrSpaceNode = NULL;
@@ -1320,18 +1321,18 @@ namespace gbe
 ConstantInt *y = dyn_cast(attrNode->getOperand(2));
 ConstantInt *z = dyn_cast(attrNode->getOperand(3));
 GBE_ASSERT(x && y && z);
-reqd_wg_sz[0] = x->getZExtValue();
-reqd_wg_sz[1] = y->getZExtValue();
-reqd_wg_sz[2] = z->getZExtValue();
+hint_wg_sz[0] = x->getZExtValue();
+hint_wg_sz[1] = y->getZExtValue();
+hint_wg_sz[2] = z->getZExtValue();
 functionAttributes += attrName->getString();
 std::stringstream param;
 char buffer[100];
 param <<"(";
-param << reqd_wg_sz[0];
+param << hint_wg_sz[0];
 param << ",";
-param << reqd_wg_sz[1];
+param << hint_wg_sz[1];
 param << ",";
-param << reqd_wg_sz[2];
+param << hint_wg_sz[2];
 param <<")";
 param >> buffer;
 functionAttributes += buffer;
-- 
1.7.9.5

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 0/3] SelfLoop enable.

2014-09-14 Thread xionghu . luo
From: Luo Xionghu 

this patchset enables the selfloop with "WHILE" instruction.
a regression is introduced in utest case builtin_remquo: phenomenon is the 
executed 
block sequence is different, maybe there is some bugs in buildJIPs or somewhere 
else,
calling for zhigang's help.


Luo Xionghu (3):
  Add Gen IR WHILE.
  add handleSelfLoopNode to insert while instruction on Gen IR level.
  Use instruction WHILE to manipulate structure.

 backend/src/backend/gen_context.cpp |   10 +++
 backend/src/backend/gen_encoder.cpp |   13 -
 backend/src/backend/gen_encoder.hpp |2 ++
 backend/src/backend/gen_insn_scheduling.cpp |2 +-
 backend/src/backend/gen_insn_selection.cpp  |   20 +
 backend/src/backend/gen_insn_selection.hxx  |1 +
 backend/src/ir/function.hpp |3 ++
 backend/src/ir/instruction.cpp  |7 -
 backend/src/ir/instruction.hpp  |2 ++
 backend/src/ir/instruction.hxx  |1 +
 backend/src/ir/structural_analysis.cpp  |   42 ---
 backend/src/ir/structural_analysis.hpp  |4 +--
 12 files changed, 91 insertions(+), 16 deletions(-)

-- 
1.7.9.5

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 1/3] Add Gen IR WHILE.

2014-09-14 Thread xionghu . luo
From: Luo Xionghu 

Add Gen IR WHILE to mark the strucutred region.

Signed-off-by: Luo Xionghu 
---
 backend/src/ir/instruction.cpp |7 ++-
 backend/src/ir/instruction.hpp |2 ++
 backend/src/ir/instruction.hxx |1 +
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 370fb87..b8beea1 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -349,7 +349,7 @@ namespace ir {
 {
 public:
   INLINE BranchInstruction(Opcode op, LabelIndex labelIndex, Register 
predicate, bool inv_pred=false) {
-GBE_ASSERT(op == OP_BRA || op == OP_IF);
+GBE_ASSERT(op == OP_BRA || op == OP_IF || op == OP_WHILE);
 this->opcode = op;
 this->predicate = predicate;
 this->labelIndex = labelIndex;
@@ -1634,6 +1634,11 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, 
getImageIndex(void), getImageIndex
 return internal::BranchInstruction(OP_ENDIF, labelIndex).convert();
   }
 
+  // WHILE
+  Instruction WHILE(LabelIndex labelIndex, Register pred) {
+return internal::BranchInstruction(OP_WHILE, labelIndex, pred).convert();
+  }
+
   // RET
   Instruction RET(void) {
 return internal::BranchInstruction(OP_RET).convert();
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 39fb2db..afaedff 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -670,6 +670,8 @@ namespace ir {
   Instruction ELSE(LabelIndex labelIndex);
   /*! endif */
   Instruction ENDIF(LabelIndex labelIndex);
+  /*! (pred) while labelIndex */
+  Instruction WHILE(LabelIndex labelIndex, Register pred);
   /*! ret */
   Instruction RET(void);
   /*! load.type.space {dst1,...,dst_valueNum} offset value */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index abc984f..f86d0e1 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -96,3 +96,4 @@ DECL_INSN(MAD, TernaryInstruction)
 DECL_INSN(IF, BranchInstruction)
 DECL_INSN(ENDIF, BranchInstruction)
 DECL_INSN(ELSE, BranchInstruction)
+DECL_INSN(WHILE, BranchInstruction)
-- 
1.7.9.5

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 2/3] add handleSelfLoopNode to insert while instruction on Gen IR level.

2014-09-14 Thread xionghu . luo
From: Luo Xionghu 

Signed-off-by: Luo Xionghu 
---
 backend/src/backend/gen_encoder.cpp|2 +-
 backend/src/ir/function.hpp|3 +++
 backend/src/ir/structural_analysis.cpp |   40 
 backend/src/ir/structural_analysis.hpp |4 ++--
 4 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/backend/src/backend/gen_encoder.cpp 
b/backend/src/backend/gen_encoder.cpp
index 26e997d..c67e85e 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -1044,7 +1044,7 @@ namespace gbe
  this->setSrc1(&insn, GenRegister::immd(jumpDistance));
  return;
}
-   else if (insn.header.opcode == GEN_OPCODE_JMPI) {
+   else if (insn.header.opcode == GEN_OPCODE_JMPI){
  jumpDistance = jumpDistance - 2;
}
else if(insn.header.opcode == GEN_OPCODE_ENDIF)
diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp
index c5582b4..fc5ad45 100644
--- a/backend/src/ir/function.hpp
+++ b/backend/src/ir/function.hpp
@@ -142,6 +142,9 @@ namespace ir {
  * else node into all the basic blocks belong to 'then' part while the 
liveout is
  * calculated in structural_analysis.cpp:calculateNecessaryLiveout(); */
 std::set liveout;
+/* selfLoop's label.
+ * */
+LabelIndex whileLabel;
   private:
 friend class Function; //!< Owns the basic blocks
 BlockSet predecessors; //!< Incoming blocks
diff --git a/backend/src/ir/structural_analysis.cpp 
b/backend/src/ir/structural_analysis.cpp
index 459a61e..1c4bf40 100644
--- a/backend/src/ir/structural_analysis.cpp
+++ b/backend/src/ir/structural_analysis.cpp
@@ -57,6 +57,23 @@ namespace analysis
   iter++;
 }
   }
+  void ControlTree::handleSelfLoopNode(Node *loopnode, ir::LabelIndex& 
whileLabel)
+  {
+ir::BasicBlock *pbb = loopnode->getExit();
+ir::BranchInstruction* pinsn = static_cast(pbb->getLastInstruction());
+ir::Register reg = pinsn->getPredicateIndex();
+ir::BasicBlock::iterator it = pbb->end();
+it--;
+/* since this node is an while node, so we remove the BRA instruction at 
the bottom of the exit BB of 'node',
+ * and insert WHILE instead
+ */
+pbb->erase(it);
+whileLabel = pinsn->getLabelIndex();
+ir::Instruction insn = ir::WHILE(whileLabel, reg);
+ir::Instruction* p_new_insn = pbb->getParent().newInstruction(insn);
+pbb->append(*p_new_insn);
+pbb->whileLabel = whileLabel;
+  }
 
   /* recursive mark the bbs' variable needEndif, the bbs all belong to node.*/
   void ControlTree::markNeedIf(Node *node, bool status)
@@ -207,7 +224,7 @@ namespace analysis
  * structures */
 while(rit != nodes.rend())
 {
-  if((*rit)->type() == IfThen || (*rit)->type() == IfElse)
+  if((*rit)->type() == IfThen || (*rit)->type() == IfElse|| (*rit)->type() 
== SelfLoop)
   {
 if(false == (*rit)->mark && (*rit)->canBeHandled)
 {
@@ -229,7 +246,7 @@ namespace analysis
   }
   else if((*rit)->type() == SelfLoop || (*rit)->type() == WhileLoop)
   {
-printf("process loop\n");
+
   }
   rit++;
 }
@@ -260,12 +277,12 @@ namespace analysis
  */
 while(rit != nodes.rend())
 {
-  if(((*rit)->type() == IfThen || (*rit)->type() == IfElse || 
(*rit)->type() == Block) &&
+  if(((*rit)->type() == IfThen || (*rit)->type() == IfElse || 
(*rit)->type() == Block ||(*rit)->type() == SelfLoop) &&
   (*rit)->canBeHandled && (*rit)->mark == true)
   {
 markStructuredNodes(*rit, false);
 std::set ns = getStructureBasicBlocksIndex(*rit, bbs);
-ir::BasicBlock *entry = (*it)->getEntry();
+ir::BasicBlock *entry = (*rit)->getEntry();
 
 int entryIndex = *(ns.begin());
 for(size_t i=0; ichildren.begin();
+  ir::LabelIndex whilelabel;
+  handleSelfLoopNode(*child_iter, whilelabel);
+}
+break;
+
   default:
 break;
 }
@@ -841,7 +866,6 @@ namespace analysis
* ignore the identification of cyclic regions. */
   Node * ControlTree::cyclicRegionType(Node *node, NodeList &nset)
   {
-#if 0
 /* check for self-loop */
 if(nset.size() == 1)
 {
@@ -874,7 +898,6 @@ namespace analysis
   if(node->succs().size() == 2 && (*m)->succs().size() == 1 &&
  node->preds().size() == 2 && (*m)->preds().size() == 1)
   {
-printf("WhileLoop!\n\n");
 Node* p = new WhileLoopNode(node, *m);
 
 p->canBeHandled = false;
@@ -882,7 +905,6 @@ namespace analysis
 return insertNode(p);
   }
 }
-#endif
 return NULL;
   }
 
@@ -1008,7 +1030,6 @@ namespace analysis
 else
 {
 /* We now only deal with acyclic regions at this moment. */
-#if 0
   reachUnder.clear();
   nset.clear();
   for(NodeList::const_iterator m = post_order.begin(); m != 
post_order.

[Beignet] [PATCH 3/3] Use instruction WHILE to manipulate structure.

2014-09-14 Thread xionghu . luo
From: Luo Xionghu 

1. WHILE instruction should be non-schedulable.
2. if this WHILE instruction jumps to an ELSE instruction, the distance
need add 2.

Signed-off-by: Luo Xionghu 
---
 backend/src/backend/gen_context.cpp |   10 ++
 backend/src/backend/gen_encoder.cpp |   11 +++
 backend/src/backend/gen_encoder.hpp |2 ++
 backend/src/backend/gen_insn_scheduling.cpp |2 +-
 backend/src/backend/gen_insn_selection.cpp  |   20 
 backend/src/backend/gen_insn_selection.hxx  |1 +
 backend/src/ir/structural_analysis.cpp  |2 +-
 7 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp 
b/backend/src/backend/gen_context.cpp
index ba4a8f8..6cbfa43 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -254,6 +254,16 @@ namespace gbe
   p->ELSE(src);
 }
 break;
+  case SEL_OP_WHILE:
+{
+  /*const ir::LabelIndex label0(insn.index), label1(insn.index1);
+  const LabelPair labelPair(label0, label1);
+  const GenRegister src = ra->genReg(insn.src(0));
+  this->branchPos3.push_back(std::make_pair(labelPair, 
p->store.size()));*/
+  insertJumpPos(insn);
+  p->WHILE(src);
+}
+break;
   default: NOT_IMPLEMENTED;
 }
   }
diff --git a/backend/src/backend/gen_encoder.cpp 
b/backend/src/backend/gen_encoder.cpp
index c67e85e..295e11d 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -1026,6 +1026,7 @@ namespace gbe
   ALU2_BRA(IF)
   ALU2_BRA(ELSE)
   ALU2_BRA(ENDIF)
+  ALU2_BRA(WHILE)
   ALU2_BRA(BRD)
   ALU2_BRA(BRC)
 
@@ -1037,8 +1038,18 @@ namespace gbe
insn.header.opcode == GEN_OPCODE_ENDIF ||
insn.header.opcode == GEN_OPCODE_IF ||
insn.header.opcode == GEN_OPCODE_BRC ||
+   insn.header.opcode == GEN_OPCODE_WHILE ||
insn.header.opcode == GEN_OPCODE_ELSE);
 
+if( insn.header.opcode == GEN_OPCODE_WHILE ){
+  // if this WHILE instruction jump back to an ELSE instruction,
+  // need add distance to go to the next instruction.
+  GenNativeInstruction & insn_else = *(GenNativeInstruction 
*)&this->store[insnID+jumpDistance];
+  if(insn_else.header.opcode == GEN_OPCODE_ELSE){
+jumpDistance += 2;
+  }
+}
+
 if (insn.header.opcode != GEN_OPCODE_JMPI || (jumpDistance > -32769 && 
jumpDistance < 32768))  {
if (insn.header.opcode == GEN_OPCODE_IF) {
  this->setSrc1(&insn, GenRegister::immd(jumpDistance));
diff --git a/backend/src/backend/gen_encoder.hpp 
b/backend/src/backend/gen_encoder.hpp
index 9844eb8..2c999ce 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -154,6 +154,8 @@ namespace gbe
 void ELSE(GenRegister src);
 /*! ENDIF indexed instruction */
 void ENDIF(GenRegister src);
+/*! WHILE indexed instruction */
+void WHILE(GenRegister src);
 /*! BRC indexed instruction */
 void BRC(GenRegister src);
 /*! BRD indexed instruction */
diff --git a/backend/src/backend/gen_insn_scheduling.cpp 
b/backend/src/backend/gen_insn_scheduling.cpp
index 4324206..035a021 100644
--- a/backend/src/backend/gen_insn_scheduling.cpp
+++ b/backend/src/backend/gen_insn_scheduling.cpp
@@ -590,7 +590,7 @@ namespace gbe
 for (int32_t insnID = 0; insnID < insnNum; ++insnID) {
   ScheduleDAGNode *node = tracker.insnNodes[insnID];
   if (node->insn.isBranch() || node->insn.isLabel()
-  || node->insn.opcode == SEL_OP_EOT || node->insn.opcode == SEL_OP_IF
+  || node->insn.opcode == SEL_OP_EOT || node->insn.opcode == SEL_OP_IF 
|| node->insn.opcode == SEL_OP_WHILE
   || node->insn.opcode == SEL_OP_BARRIER)
 tracker.makeBarrier(insnID, insnNum);
 }
diff --git a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index 170a9d8..4509072 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -543,6 +543,8 @@ namespace gbe
 void ELSE(Reg src, ir::LabelIndex jip, ir::LabelIndex elseLabel);
 /*! ENDIF indexed instruction */
 void ENDIF(Reg src, ir::LabelIndex jip, ir::LabelIndex endifLabel = 
ir::LabelIndex(0));
+/*! WHILE indexed instruction */
+void WHILE(Reg src, ir::LabelIndex jip);
 /*! BRD indexed instruction */
 void BRD(Reg src, ir::LabelIndex jip);
 /*! BRC indexed instruction */
@@ -1062,6 +1064,12 @@ namespace gbe
 insn->index = uint16_t(this->block->endifLabel);
   }
 
+  void Selection::Opaque::WHILE(Reg src, ir::LabelIndex jip) {
+SelectionInstruction *insn = this->appendInsn(SEL_OP_WHILE, 0, 1);
+insn->src(0) = src;
+insn->index = uint16_t(jip);
+  }
+
   void Selection::Opaque::CMP(uint32_t conditional, Reg src0, Reg src1, Reg 
dst) {
 SelectionInst

[Beignet] [PATCH] remove the LinkOnceAnyLinkage since the libocl is introduced.

2014-09-15 Thread xionghu . luo
From: Luo Xionghu 

no need to set the LinkOnceAnyLinkage for global variables and functions
to avoid redefinition.

Signed-off-by: Luo Xionghu 
---
 backend/src/backend/gen_program.cpp |9 -
 1 file changed, 9 deletions(-)

diff --git a/backend/src/backend/gen_program.cpp 
b/backend/src/backend/gen_program.cpp
index 44c9c10..bd0c070 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -367,15 +367,6 @@ namespace gbe {
 }else{
   //set the global variables and functions to link once to fix redefine.
   llvm::Module* src = (llvm::Module*)((GenProgram*)src_program)->module;
-  for (llvm::Module::global_iterator I = src->global_begin(), E = 
src->global_end(); I != E; ++I) {
-I->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
-  }
-
-  for (llvm::Module::iterator I = src->begin(), E = src->end(); I != E; 
++I) {
-llvm::Function *F = llvm::dyn_cast(I);
-if (F && isKernelFunction(*F)) continue;
-I->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
-  }
   llvm::Module* dst = (llvm::Module*)((GenProgram*)dst_program)->module;
   llvm::Linker::LinkModules( dst,
  src,
-- 
1.7.9.5

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] improve the build performance of vector type built-in function.

2014-09-16 Thread xionghu . luo
From: Luo Xionghu 

this patch was lost during the libocl merge. resubmit it to improve the
vector function performance.

please refer to e2db890596eea0a6eb741e11e576a38952f1ed1e for detail.

Signed-off-by: Luo Xionghu 
---
 backend/src/libocl/script/gen_vector.py |   45 ++-
 1 file changed, 39 insertions(+), 6 deletions(-)

diff --git a/backend/src/libocl/script/gen_vector.py 
b/backend/src/libocl/script/gen_vector.py
index a91dfcf..de28552 100755
--- a/backend/src/libocl/script/gen_vector.py
+++ b/backend/src/libocl/script/gen_vector.py
@@ -289,9 +289,42 @@ class builtinProto():
 formatStr += ';'
 self.append(formatStr)
 return formatStr
-formatStr = self.append(formatStr, '{{return 
({0}{1})('.format(vtype[0], vtype[1]))
-self.indent = len(formatStr)
-for j in range(0, vtype[1]):
+if self.functionName != 'select' and ptypeSeqs[0] == 
ptypeSeqs[self.paramCount-1] and ptype[1] > 4:
+formatStr += '\n{ \n  union{'
+formatStr = self.append(formatStr, '{0} 
va[{1}];'.format(vtype[0], vtype[1]))
+formatStr = self.append(formatStr, '{0}{1} 
vv{2};'.format(vtype[0], vtype[1], vtype[1]))
+formatStr += '\n  }uret;'
+formatStr += '\n  union{'
+formatStr = self.append(formatStr, '{0} 
pa[{1}];'.format(ptype[0], ptype[1]))
+formatStr = self.append(formatStr, '{0}{1} 
pv{2};'.format(ptype[0], ptype[1], ptype[1]))
+formatStr += '\n  }'
+for n in range(0, self.paramCount):
+  formatStr += 'usrc{0}'.format(n)
+  if n+1 != self.paramCount:
+formatStr +=', '
+formatStr += ';'
+
+for n in range(0, self.paramCount):
+  formatStr = self.append(formatStr, '  usrc{0}.pv{1} = 
param{2};'.format(n, ptype[1], n))
+formatStr = self.append(formatStr, '  for(int i =0; i < {0}; 
i++)'.format(ptype[1]))
+formatStr += '\nuret.va[i] = '
+if self.prefix == 'relational' and self.functionName != 
'bitselect' and self.functionName != 'select':
+  formatStr += '-'
+formatStr += '{0}('.format(self.functionName)
+
+for n in range(0, self.paramCount):
+  formatStr += 'usrc{0}.pa[i]'.format(n)
+  if n+1 != self.paramCount:
+formatStr +=', '
+formatStr += ');'
+formatStr = self.append(formatStr, ' return 
uret.vv{0};'.format(vtype[1]))
+formatStr += '\n}'
+formatStr = self.append(formatStr)
+return formatStr
+else:
+  formatStr = self.append(formatStr, '{{return 
({0}{1})('.format(vtype[0], vtype[1]))
+  self.indent = len(formatStr)
+  for j in range(0, vtype[1]):
 if (j != 0):
 formatStr += ','
 if (j + 1) % 2 == 0:
@@ -326,10 +359,10 @@ class builtinProto():
 
 formatStr += ')'
 
-formatStr += '); }\n'
-self.append(formatStr)
+  formatStr += '); }\n'
+  self.append(formatStr)
 
-return formatStr
+  return formatStr
 
 def output(self):
 for line in self.outputStr:
-- 
1.7.9.5

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] fix switch bug and utest memory leak.

2014-09-16 Thread xionghu . luo
From: Luo Xionghu 

Signed-off-by: Luo Xionghu 
---
 src/cl_device_id.c|1 +
 utests/builtin_kernel_max_global_size.cpp |5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index ee3f2b7..1ce5380 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -607,6 +607,7 @@ cl_get_kernel_workgroup_info(cl_kernel kernel,
 
 return CL_SUCCESS;
   }
+  return CL_SUCCESS;
 default:
   return CL_INVALID_VALUE;
   };
diff --git a/utests/builtin_kernel_max_global_size.cpp 
b/utests/builtin_kernel_max_global_size.cpp
index c777564..e6910cd 100644
--- a/utests/builtin_kernel_max_global_size.cpp
+++ b/utests/builtin_kernel_max_global_size.cpp
@@ -18,12 +18,13 @@ void builtin_kernel_max_global_size(void)
   OCL_ASSERT(builtin_kernel_1d != NULL);
   size_t param_value_size;
   void* param_value;
-  clGetKernelWorkGroupInfo(builtin_kernel_1d, device, 
CL_KERNEL_GLOBAL_WORK_SIZE, 0, NULL, ¶m_value_size);
+  OCL_CALL(clGetKernelWorkGroupInfo, builtin_kernel_1d, device, 
CL_KERNEL_GLOBAL_WORK_SIZE, 0, NULL, ¶m_value_size);
   param_value = malloc(param_value_size);
-  clGetKernelWorkGroupInfo(builtin_kernel_1d, device, 
CL_KERNEL_GLOBAL_WORK_SIZE, param_value_size, param_value, 0);
+  OCL_CALL(clGetKernelWorkGroupInfo, builtin_kernel_1d, device, 
CL_KERNEL_GLOBAL_WORK_SIZE, param_value_size, param_value, 0);
   OCL_ASSERT(*(size_t*)param_value == 256 * 1024 *1024);
   clReleaseKernel(builtin_kernel_1d);
   clReleaseProgram(built_in_prog);
+  free(built_in_kernel_names);
   free(param_value);
 }
 
-- 
1.7.9.5

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH V2] fix CL_KERNEL_GLOBAL_WORK_SIZE bug.

2014-09-25 Thread xionghu . luo
From: Luo 

the option  CL_KERNEL_GLOBAL_WORK_SIZE for clGetKernelWorkGroupInfo
should call built in kernel or custom device according to the spec,
this patch calls the built in kernel to query the GLOBAL_WORK_SIZE.

v2: use built in kernel to qury the GLOBAL_WORK_SIZE if exist, dummy
kernel for other options, handle the case when no built in kernel is
provided.

Signed-off-by: Luo 
---
 tests/cl/api/get-kernel-work-group-info.c |   66 +++--
 1 file changed, 63 insertions(+), 3 deletions(-)

diff --git a/tests/cl/api/get-kernel-work-group-info.c 
b/tests/cl/api/get-kernel-work-group-info.c
index 47d09da..11d29d2 100644
--- a/tests/cl/api/get-kernel-work-group-info.c
+++ b/tests/cl/api/get-kernel-work-group-info.c
@@ -61,6 +61,11 @@ piglit_cl_test(const int argc,
int i;
cl_int errNo;
cl_kernel kernel;
+   cl_program built_in_prog = NULL;
+   cl_kernel built_in_kernel = NULL;
+   cl_kernel temp_kernel;
+   size_t built_in_kernels_size;
+
 
size_t param_value_size;
void* param_value;
@@ -71,19 +76,65 @@ piglit_cl_test(const int argc,
PIGLIT_CL_ENUM_ARRAY(cl_kernel_work_group_info);
 
kernel = clCreateKernel(env->program,
-   "dummy_kernel",
-   &errNo);
+   "dummy_kernel",
+   &errNo);
+
+   errNo = clGetDeviceInfo(env->device_id, CL_DEVICE_BUILT_IN_KERNELS, 0, 
0, &built_in_kernels_size);
if(!piglit_cl_check_error(errNo, CL_SUCCESS)) {
fprintf(stderr,
-   "Failed (error code: %s): Create kernel.\n",
+   "Failed (error code: %s): Get Device Info.\n",
piglit_cl_get_error_name(errNo));
return PIGLIT_FAIL;
}
 
+   if(built_in_kernels_size != 0)
+   {
+   char* built_in_kernel_names;
+   char* kernel_name;
+   size_t ret_sz;
+   built_in_kernel_names = (char* )malloc(built_in_kernels_size * 
sizeof(char) );
+
+   errNo = clGetDeviceInfo(env->device_id, 
CL_DEVICE_BUILT_IN_KERNELS, built_in_kernels_size, 
(void*)built_in_kernel_names, &ret_sz);
+   if(!piglit_cl_check_error(errNo, CL_SUCCESS)) {
+   fprintf(stderr,
+   "Failed (error code: %s): Get 
Device Info.\n",
+   
piglit_cl_get_error_name(errNo));
+   return PIGLIT_FAIL;
+   }
+
+   built_in_prog = 
clCreateProgramWithBuiltInKernels(env->context->cl_ctx, 1, &env->device_id, 
built_in_kernel_names, &errNo);
+   if(!piglit_cl_check_error(errNo, CL_SUCCESS)) {
+   fprintf(stderr,
+   "Failed (error code: %s): 
Create BuiltIn Program.\n",
+   
piglit_cl_get_error_name(errNo));
+   return PIGLIT_FAIL;
+   }
+
+   kernel_name = strtok(built_in_kernel_names, ";");
+
+   built_in_kernel = clCreateKernel(built_in_prog, kernel_name,  
&errNo);
+   if(!piglit_cl_check_error(errNo, CL_SUCCESS)) {
+   fprintf(stderr,
+   "Failed (error code: %s): 
Create kernel.\n",
+   
piglit_cl_get_error_name(errNo));
+   return PIGLIT_FAIL;
+   }
+   free(built_in_kernel_names);
+   }
+
/*** Normal usage ***/
for(i = 0; i < num_kernel_work_group_infos; i++) {
printf("%s ", 
piglit_cl_get_enum_name(kernel_work_group_infos[i]));
 
+   //use builtin kernel to test CL_KERNEL_GLOBAL_WORK_SIZE.
swap the dummy kernel and builtin_kernel.
+   if(kernel_work_group_infos[i] == CL_KERNEL_GLOBAL_WORK_SIZE){
+   if(built_in_kernel != NULL) {
+   temp_kernel = kernel;
+   kernel = built_in_kernel;
+   built_in_kernel = temp_kernel;
+   }
+   }
+
errNo = clGetKernelWorkGroupInfo(kernel,
 env->device_id,
 kernel_work_group_infos[i],
@@ -114,6 +165,13 @@ piglit_cl_test(const int argc,
piglit_merge_result(&result, PIGLIT_FAIL);
}
 
+   if(kernel_work_group_infos[i] == CL_KERNEL_GLOBAL_WORK_SIZE){
+   if(built_in_kernel != NULL) {
+   temp_kernel = kernel;
+   kernel = built_in_kernel;
+   built_in_kernel = temp_k

[Beignet] [PATCH] fix CL_KERNEL_GLOBAL_WORK_SIZE bug.

2014-09-27 Thread xionghu . luo
From: Luo 

the option  CL_KERNEL_GLOBAL_WORK_SIZE for clGetKernelWorkGroupInfo
should call built in kernel or custom device according to the spec,
this patch calls the built in kernel to query the GLOBAL_WORK_SIZE.

v2: use built in kernel to qury the GLOBAL_WORK_SIZE if exist, dummy
kernel for other options, handle the case when no built in kernel is
provided.

v3: fix indent issue; loop CL_KERNEL_GLOBAL_WORK_SIZE out, test it with the 
platform supports opencl-1.2.

Signed-off-by: Luo 
---
 tests/cl/api/get-kernel-work-group-info.c |  127 +
 1 file changed, 127 insertions(+)

diff --git a/tests/cl/api/get-kernel-work-group-info.c 
b/tests/cl/api/get-kernel-work-group-info.c
index 47d09da..f3fd6e5 100644
--- a/tests/cl/api/get-kernel-work-group-info.c
+++ b/tests/cl/api/get-kernel-work-group-info.c
@@ -61,6 +61,11 @@ piglit_cl_test(const int argc,
int i;
cl_int errNo;
cl_kernel kernel;
+#ifdef CL_VERSION_1_2
+   cl_program built_in_prog = NULL;
+   cl_kernel built_in_kernel = NULL;
+   size_t built_in_kernels_size;
+#endif
 
size_t param_value_size;
void* param_value;
@@ -84,6 +89,17 @@ piglit_cl_test(const int argc,
for(i = 0; i < num_kernel_work_group_infos; i++) {
printf("%s ", 
piglit_cl_get_enum_name(kernel_work_group_infos[i]));
 
+#ifdef CL_VERSION_1_2
+   if(kernel_work_group_infos[i] == CL_KERNEL_GLOBAL_WORK_SIZE){
+   if(env->version >= 12) {
+   continue;
+   }else{
+   fprintf(stderr, "Could not query 
CL_KERNEL_GLOBAL_WORK_SIZE. Piglit was compiled against OpenCL version >= 1.2 
and cannot run this test for versions < 1.2 because CL_KERNEL_GLOBAL_WORK_SIZE 
option is not present.\n");
+   piglit_merge_result(&result, PIGLIT_FAIL);
+   }
+   }
+#endif
+
errNo = clGetKernelWorkGroupInfo(kernel,
 env->device_id,
 kernel_work_group_infos[i],
@@ -187,6 +203,117 @@ piglit_cl_test(const int argc,
piglit_merge_result(&result, PIGLIT_FAIL);
}
 
+#ifdef CL_VERSION_1_2
+   if(env->version < 12){
+   fprintf(stderr, "Could not query 
CL_KERNEL_GLOBAL_WORK_SIZE. Piglit was compiled against OpenCL version >= 1.2 
and cannot run this test for versions < 1.2 because CL_KERNEL_GLOBAL_WORK_SIZE 
option is not present.\n");
+   piglit_merge_result(&result, PIGLIT_FAIL);
+   }
+
+   //use builtin kernel to test CL_KERNEL_GLOBAL_WORK_SIZE.
+   errNo = clGetDeviceInfo(env->device_id, CL_DEVICE_BUILT_IN_KERNELS, 0, 
0, &built_in_kernels_size);
+   if(!piglit_cl_check_error(errNo, CL_SUCCESS)) {
+   fprintf(stderr,
+   "Failed (error code: %s): Get Device Info.\n",
+   piglit_cl_get_error_name(errNo));
+   piglit_merge_result(&result, PIGLIT_FAIL);
+   }
+
+   if(built_in_kernels_size != 0)
+   {
+   char* built_in_kernel_names;
+   char* kernel_name;
+   size_t ret_sz;
+   built_in_kernel_names = (char* )malloc(built_in_kernels_size * 
sizeof(char) );
+
+   errNo = clGetDeviceInfo(env->device_id, 
CL_DEVICE_BUILT_IN_KERNELS, built_in_kernels_size, 
(void*)built_in_kernel_names, &ret_sz);
+   if(!piglit_cl_check_error(errNo, CL_SUCCESS)) {
+   fprintf(stderr,
+   "Failed (error code: %s): Get Device Info.\n",
+   piglit_cl_get_error_name(errNo));
+   piglit_merge_result(&result, PIGLIT_FAIL);
+   }
+
+   built_in_prog = 
clCreateProgramWithBuiltInKernels(env->context->cl_ctx, 1, &env->device_id, 
built_in_kernel_names, &errNo);
+   if(!piglit_cl_check_error(errNo, CL_SUCCESS)) {
+   fprintf(stderr,
+   "Failed (error code: %s): Create BuiltIn Program.\n",
+   piglit_cl_get_error_name(errNo));
+   piglit_merge_result(&result, PIGLIT_FAIL);
+   }
+
+   kernel_name = strtok(built_in_kernel_names, ";");
+
+   built_in_kernel = clCreateKernel(built_in_prog, kernel_name,  
&errNo);
+   if(!piglit_cl_check_error(errNo, CL_SUCCESS)) {
+   fprintf(stderr,
+   "Failed (error code: %s): Create kernel.\n",
+   piglit_cl_get_error_name(errNo));
+   piglit_merge_result(&result, PIGLIT_FAIL);
+   }
+   free(built_in_kernel_names);
+   /*
+* CL_INVALID_VALUE if kernel is not a built in kernel.
+*/
+   errNo = clGetKernelWorkGroupInfo(kernel,
+   

[Beignet] [PATCH] use global flag 0.0 to control unstructured simple block.

2014-09-29 Thread xionghu . luo
From: Luo Xionghu 

filter the simple block out and replace the if/endif with global flag to
control.

Signed-off-by: Luo Xionghu 
---
 backend/src/backend/gen_insn_selection.cpp |   50 
 backend/src/backend/gen_insn_selection.hpp |1 +
 backend/src/backend/gen_reg_allocation.cpp |3 +-
 3 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index f284ae1..e3547c6 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -217,7 +217,7 @@ namespace gbe
   // SelectionBlock
   ///
 
-  SelectionBlock::SelectionBlock(const ir::BasicBlock *bb) : bb(bb), 
isLargeBlock(false), endifLabel( (ir::LabelIndex) 0){}
+  SelectionBlock::SelectionBlock(const ir::BasicBlock *bb) : bb(bb), 
isLargeBlock(false), endifLabel( (ir::LabelIndex) 0), 
removeSimpleIfEndif(false){}
 
   void SelectionBlock::append(ir::Register reg) { tmp.push_back(reg); }
 
@@ -403,6 +403,8 @@ namespace gbe
 uint32_t buildBasicBlockDAG(const ir::BasicBlock &bb);
 /*! Perform the selection on the basic block */
 void matchBasicBlock(const ir::BasicBlock &bb, uint32_t insnNum);
+/*! a simple block can use predication instead of if/endif*/
+bool isSimpleBlock(const ir::BasicBlock &bb, uint32_t insnNum);
 /*! A root instruction needs to be generated */
 bool isRoot(const ir::Instruction &insn) const;
 
@@ -1471,6 +1473,26 @@ namespace gbe
 return false;
   }
 
+  bool Selection::Opaque::isSimpleBlock(const ir::BasicBlock &bb, uint32_t 
insnNum) {
+for (int32_t insnID = insnNum-1; insnID >= 0; --insnID) {
+  SelectionDAG &dag = *insnDAG[insnID];
+  const ir::Instruction& insn = dag.insn;
+  if(insn.isMemberOf() ||
+ insn.isMemberOf() ||
+ insn.getOpcode() == ir::OP_SIMD_ANY ||
+ insn.getOpcode() == ir::OP_SIMD_ALL ||
+ insn.getOpcode() == ir::OP_ELSE)
+return false;
+}
+
+if(!(insnDAG[insnNum-1]->insn.isMemberOf()) ||
+insnDAG[insnNum-1]->insn.getOpcode() == ir::OP_ENDIF)
+  return false;
+
+return true;
+  }
+
+
   uint32_t Selection::Opaque::buildBasicBlockDAG(const ir::BasicBlock &bb)
   {
 using namespace ir;
@@ -1551,7 +1573,9 @@ namespace gbe
 // Bottom up code generation
 bool needEndif = this->block->hasBranch == false && 
!this->block->hasBarrier;
 needEndif = needEndif && bb.needEndif;
-if (needEndif) {
+this->block->removeSimpleIfEndif = insnNum < 5 && isSimpleBlock(bb, 
insnNum);
+//this->block->removeSimpleIfEndif = 
false;//this->block->removeSimpleIfEndif && needEndif;
+if (needEndif && !this->block->removeSimpleIfEndif) {
   if(!bb.needIf) // this basic block is the exit of a structure
 this->ENDIF(GenRegister::immd(0), bb.endifLabel, bb.endifLabel);
   else {
@@ -1572,6 +1596,12 @@ namespace gbe
 
 // Start a new code fragment
 this->startBackwardGeneration();
+
+if(this->block->removeSimpleIfEndif){
+  this->curr.predicate = GEN_PREDICATE_NORMAL;
+  this->curr.flag = 0;
+  this->curr.subFlag = 0;
+}
 // If there is no branch at the end of this block.
 
 // Try all the patterns from best to worst
@@ -1581,6 +1611,12 @@ namespace gbe
   ++it;
 } while (it != end);
 GBE_ASSERT(it != end);
+
+if(this->block->removeSimpleIfEndif){
+  this->curr.predicate = GEN_PREDICATE_NONE;
+  this->curr.flag = 0;
+  this->curr.subFlag = 0;
+}
 // If we are in if/endif fix mode, and this block is
 // large enough, we need to insert endif/if pair to eliminate
 // the too long if/endif block.
@@ -3808,7 +3844,8 @@ namespace gbe
 sel.JMPI(GenRegister::immd(0), jip, label);
   sel.pop();
 }
-sel.push();
+if(!sel.block->removeSimpleIfEndif){
+  sel.push();
   sel.curr.predicate = GEN_PREDICATE_NORMAL;
   if(!insn.getParent()->needEndif && insn.getParent()->needIf) {
 ir::LabelIndex label = insn.getParent()->endifLabel;
@@ -3816,7 +3853,8 @@ namespace gbe
   }
   else
 sel.IF(GenRegister::immd(0), sel.block->endifLabel, 
sel.block->endifLabel);
-sel.pop();
+  sel.pop();
+}
   }
 
   return true;
@@ -4077,7 +4115,7 @@ namespace gbe
   sel.curr.predicate = GEN_PREDICATE_NORMAL;
   sel.MOV(ip, GenRegister::immuw(uint16_t(dst)));
   sel.curr.predicate = GEN_PREDICATE_NONE;
-  if (!sel.block->hasBarrier)
+  if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif)
 sel.ENDIF(GenRegister::immd(0), nextLabel);
   sel.block->endifOffset = -1;
 sel.pop();
@@ -4087,7 +4125,7 @@ namespace gbe
   

[Beignet] [PATCH 2/2] add utest popcount_int and popcount_short.

2014-10-10 Thread xionghu . luo
From: Luo Xionghu 

Signed-off-by: Luo Xionghu 
---
 kernels/compiler_popcount_int.cl   |4 
 kernels/compiler_popcount_short.cl |4 
 utests/CMakeLists.txt  |2 ++
 utests/compiler_popcount_int.cpp   |   32 
 utests/compiler_popcount_short.cpp |   32 
 5 files changed, 74 insertions(+)
 create mode 100644 kernels/compiler_popcount_int.cl
 create mode 100644 kernels/compiler_popcount_short.cl
 create mode 100644 utests/compiler_popcount_int.cpp
 create mode 100644 utests/compiler_popcount_short.cpp

diff --git a/kernels/compiler_popcount_int.cl b/kernels/compiler_popcount_int.cl
new file mode 100644
index 000..b972dbc
--- /dev/null
+++ b/kernels/compiler_popcount_int.cl
@@ -0,0 +1,4 @@
+kernel void compiler_popcount_int(global int *src, global int *dst) {
+  int i = get_global_id(0);
+  dst[i] = popcount(src[i]);
+}
diff --git a/kernels/compiler_popcount_short.cl 
b/kernels/compiler_popcount_short.cl
new file mode 100644
index 000..e4204c5
--- /dev/null
+++ b/kernels/compiler_popcount_short.cl
@@ -0,0 +1,4 @@
+kernel void compiler_popcount_short(global short *src, global short *dst) {
+  int i = get_global_id(0);
+  dst[i] = popcount(src[i]);
+}
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index b45ecf9..2fe6243 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -41,6 +41,8 @@ set (utests_sources
   compiler_ceil.cpp
   compiler_clz_short.cpp
   compiler_clz_int.cpp
+  compiler_popcount_short.cpp
+  compiler_popcount_int.cpp
   compiler_convert_uchar_sat.cpp
   compiler_copy_buffer.cpp
   compiler_copy_image.cpp
diff --git a/utests/compiler_popcount_int.cpp b/utests/compiler_popcount_int.cpp
new file mode 100644
index 000..a3f675e
--- /dev/null
+++ b/utests/compiler_popcount_int.cpp
@@ -0,0 +1,32 @@
+#include "utest_helper.hpp"
+
+void compiler_popcount_int(void)
+{
+  const int n = 32;
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL("compiler_popcount_int");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  globals[0] = n;
+  locals[0] = 16;
+
+  OCL_MAP_BUFFER(0);
+  ((int*)buf_data[0])[0] = 0;
+  for (int32_t i = 1; i < (int32_t) n; ++i)
+((int*)buf_data[0])[i] = 0xu >> i;
+  OCL_UNMAP_BUFFER(0);
+
+  OCL_NDRANGE(1);
+
+  OCL_MAP_BUFFER(1);
+  OCL_ASSERT(((int*)buf_data[1])[0] == 0);
+  for (int i = 1; i < n; ++i){
+OCL_ASSERT(((int*)buf_data[1])[i] == n-i);
+  }
+  OCL_UNMAP_BUFFER(1);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_popcount_int);
diff --git a/utests/compiler_popcount_short.cpp 
b/utests/compiler_popcount_short.cpp
new file mode 100644
index 000..7aa1ebf
--- /dev/null
+++ b/utests/compiler_popcount_short.cpp
@@ -0,0 +1,32 @@
+#include "utest_helper.hpp"
+
+void compiler_popcount_short(void)
+{
+  const int n = 16;
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL("compiler_popcount_short");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(short), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(short), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  globals[0] = n;
+  locals[0] = 16;
+
+  OCL_MAP_BUFFER(0);
+  ((short*)buf_data[0])[0] = 0;
+  for (int32_t i = 1; i < (int32_t) n; ++i)
+((short*)buf_data[0])[i] = 0xu >> i;
+  OCL_UNMAP_BUFFER(0);
+
+  OCL_NDRANGE(1);
+
+  OCL_MAP_BUFFER(1);
+  OCL_ASSERT(((short*)buf_data[1])[0] == 0);
+  for (int i = 1; i < n; ++i){
+OCL_ASSERT(((short*)buf_data[1])[i] == short(n-i) );
+  }
+  OCL_UNMAP_BUFFER(1);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_popcount_short);
-- 
1.7.9.5

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 1/2] add opencl-1.2 builtin function popcount.

2014-10-10 Thread xionghu . luo
From: Luo 

the popcount function returns the number of non-zero bits in input.
use GEN instruction cbit(Count Bits Set) to implement it.

Signed-off-by: Luo Xionghu 
---
 backend/src/backend/gen/gen_mesa_disasm.c   |1 +
 backend/src/backend/gen_context.cpp |1 +
 backend/src/backend/gen_defs.hpp|1 +
 backend/src/backend/gen_encoder.cpp |1 +
 backend/src/backend/gen_encoder.hpp |1 +
 backend/src/backend/gen_insn_selection.cpp  |4 +++-
 backend/src/backend/gen_insn_selection.hxx  |1 +
 backend/src/ir/instruction.cpp  |1 +
 backend/src/ir/instruction.hpp  |2 ++
 backend/src/ir/instruction.hxx  |1 +
 backend/src/libocl/script/ocl_integer.def   |3 +--
 backend/src/libocl/tmpl/ocl_integer.tmpl.cl |   30 +++
 backend/src/libocl/tmpl/ocl_integer.tmpl.h  |9 
 backend/src/llvm/llvm_gen_backend.cpp   |2 ++
 backend/src/llvm/llvm_gen_ocl_function.hxx  |1 +
 15 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/backend/src/backend/gen/gen_mesa_disasm.c 
b/backend/src/backend/gen/gen_mesa_disasm.c
index 266b501..330dffb 100644
--- a/backend/src/backend/gen/gen_mesa_disasm.c
+++ b/backend/src/backend/gen/gen_mesa_disasm.c
@@ -66,6 +66,7 @@ static const struct {
   [GEN_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
   [GEN_OPCODE_FBH] = { .name = "fbh", .nsrc = 1, .ndst = 1 },
   [GEN_OPCODE_FBL] = { .name = "fbl", .nsrc = 1, .ndst = 1 },
+  [GEN_OPCODE_CBIT] = { .name = "cbit", .nsrc = 1, .ndst = 1 },
   [GEN_OPCODE_F16TO32] = { .name = "f16to32", .nsrc = 1, .ndst = 1 },
   [GEN_OPCODE_F32TO16] = { .name = "f32to16", .nsrc = 1, .ndst = 1 },
 
diff --git a/backend/src/backend/gen_context.cpp 
b/backend/src/backend/gen_context.cpp
index 8844233..c37d2ee 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -205,6 +205,7 @@ namespace gbe
   case SEL_OP_READ_ARF: p->MOV(dst, src); break;
   case SEL_OP_FBH: p->FBH(dst, src); break;
   case SEL_OP_FBL: p->FBL(dst, src); break;
+  case SEL_OP_CBIT: p->CBIT(dst, src); break;
   case SEL_OP_NOT: p->NOT(dst, src); break;
   case SEL_OP_RNDD: p->RNDD(dst, src); break;
   case SEL_OP_RNDU: p->RNDU(dst, src); break;
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
index 19aad95..3faacde 100644
--- a/backend/src/backend/gen_defs.hpp
+++ b/backend/src/backend/gen_defs.hpp
@@ -159,6 +159,7 @@ enum opcode {
   GEN_OPCODE_LZD = 74,
   GEN_OPCODE_FBH = 75,
   GEN_OPCODE_FBL = 76,
+  GEN_OPCODE_CBIT = 77,
   GEN_OPCODE_ADDC = 78,
   GEN_OPCODE_SUBB = 79,
   GEN_OPCODE_SAD2 = 80,
diff --git a/backend/src/backend/gen_encoder.cpp 
b/backend/src/backend/gen_encoder.cpp
index 295e11d..bd6204a 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -901,6 +901,7 @@ namespace gbe
   ALU1(RNDU)
   ALU1(FBH)
   ALU1(FBL)
+  ALU1(CBIT)
   ALU1(F16TO32)
   ALU1(F32TO16)
   ALU2(SEL)
diff --git a/backend/src/backend/gen_encoder.hpp 
b/backend/src/backend/gen_encoder.hpp
index 2c999ce..3f486d7 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -101,6 +101,7 @@ namespace gbe
 ALU1(MOV)
 ALU1(FBH)
 ALU1(FBL)
+ALU1(CBIT)
 ALU2(SUBB)
 ALU2(UPSAMPLE_SHORT)
 ALU2(UPSAMPLE_INT)
diff --git a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index e3ee35d..e05a0a6 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -493,6 +493,7 @@ namespace gbe
 ALU2WithTemp(MUL_HI)
 ALU1(FBH)
 ALU1(FBL)
+ALU1(CBIT)
 ALU2WithTemp(HADD)
 ALU2WithTemp(RHADD)
 ALU2(UPSAMPLE_SHORT)
@@ -1861,7 +1862,7 @@ namespace gbe
 static ir::Type getType(const ir::Opcode opcode, const ir::Type insnType) {
   if (insnType == ir::TYPE_S64 || insnType == ir::TYPE_U64 || insnType == 
ir::TYPE_S8 || insnType == ir::TYPE_U8)
 return insnType;
-  if (opcode == ir::OP_FBH || opcode == ir::OP_FBL)
+  if (opcode == ir::OP_FBH || opcode == ir::OP_FBL || opcode == 
ir::OP_CBIT)
 return ir::TYPE_U32;
   if (insnType == ir::TYPE_S16 || insnType == ir::TYPE_U16)
 return insnType;
@@ -1915,6 +1916,7 @@ namespace gbe
   case ir::OP_RNDZ: sel.RNDZ(dst, src); break;
   case ir::OP_FBH: sel.FBH(dst, src); break;
   case ir::OP_FBL: sel.FBL(dst, src); break;
+  case ir::OP_CBIT: sel.CBIT(dst, src); break;
   case ir::OP_COS: sel.MATH(dst, GEN_MATH_FUNCTION_COS, src); break;
   case ir::OP_SIN: sel.MATH(dst, GEN_MATH_FUNCTION_SIN, src); break;
   case ir::OP_LOG: sel.MATH(dst, GEN_MATH_FUNCTION_LOG, src); break;
diff --git a/backend/src/backend/gen_insn_selection.hxx 
b/backend/src/backend/gen_insn_selection.hxx
index 7511b84..d80dc58 100644
--- a/backend/src/b

[Beignet] [PATCH v2 2/2] add utest popcount for all types.

2014-10-12 Thread xionghu . luo
From: Luo Xionghu 

v2: add all types to test.

Signed-off-by: Luo Xionghu 
---
 kernels/compiler_popcount.cl |   16 +
 utests/CMakeLists.txt|1 +
 utests/compiler_popcount.cpp |   75 ++
 3 files changed, 92 insertions(+)
 create mode 100644 kernels/compiler_popcount.cl
 create mode 100644 utests/compiler_popcount.cpp

diff --git a/kernels/compiler_popcount.cl b/kernels/compiler_popcount.cl
new file mode 100644
index 000..1636118
--- /dev/null
+++ b/kernels/compiler_popcount.cl
@@ -0,0 +1,16 @@
+#define TEST_TYPE(TYPE)   \
+kernel void test_##TYPE(global TYPE *src, global TYPE *dst) { \
+  int i = get_global_id(0);   \
+  dst[i] = popcount(src[i]);  \
+}
+
+TEST_TYPE(char)
+TEST_TYPE(uchar)
+TEST_TYPE(short)
+TEST_TYPE(ushort)
+TEST_TYPE(int)
+TEST_TYPE(uint)
+TEST_TYPE(long)
+TEST_TYPE(ulong)
+
+#undef TEST_TYPE
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index b45ecf9..1b8caca 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -41,6 +41,7 @@ set (utests_sources
   compiler_ceil.cpp
   compiler_clz_short.cpp
   compiler_clz_int.cpp
+  compiler_popcount.cpp
   compiler_convert_uchar_sat.cpp
   compiler_copy_buffer.cpp
   compiler_copy_image.cpp
diff --git a/utests/compiler_popcount.cpp b/utests/compiler_popcount.cpp
new file mode 100644
index 000..0658e1b
--- /dev/null
+++ b/utests/compiler_popcount.cpp
@@ -0,0 +1,75 @@
+#include "utest_helper.hpp"
+
+namespace {
+
+template
+T get_max();
+
+#define DEF_TEMPLATE(TYPE, NAME)\
+template <> \
+TYPE get_max()\
+{   \
+  static TYPE max = CL_##NAME##_MAX;\
+  return max;   \
+}   \
+\
+template <> \
+u##TYPE get_max()  \
+{   \
+  static u##TYPE max = CL_U##NAME##_MAX;\
+  return max;   \
+}
+
+DEF_TEMPLATE(int8_t, CHAR)
+DEF_TEMPLATE(int16_t, SHRT)
+DEF_TEMPLATE(int32_t, INT)
+DEF_TEMPLATE(int64_t, LONG)
+
+template
+void test(const char *kernel_name)
+{
+  const int n = sizeof(T) * 8;
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_popcount", kernel_name);
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(T), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(T), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  globals[0] = n;
+  locals[0] = n;
+
+  OCL_MAP_BUFFER(0);
+  ((T*)buf_data[0])[0] = 0;
+  for (int32_t i = 1; i < (int32_t) n; ++i){
+((T*)buf_data[0])[i] = get_max() >> i;
+  }
+  OCL_UNMAP_BUFFER(0);
+
+  OCL_NDRANGE(1);
+
+  OCL_MAP_BUFFER(1);
+  OCL_ASSERT(((T*)buf_data[1])[0] == 0);
+  for (int i = 1; i < n; ++i){
+OCL_ASSERT(((T*)buf_data[1])[i] == n-i);
+  }
+  OCL_UNMAP_BUFFER(1);
+}
+
+}
+
+#define compiler_popcount(type, kernel) \
+static void compiler_popcount_ ##type(void)\
+{\
+  test(# kernel);\
+}\
+MAKE_UTEST_FROM_FUNCTION(compiler_popcount_ ## type);
+
+compiler_popcount(int8_t, test_char)
+compiler_popcount(uint8_t, test_uchar)
+compiler_popcount(int16_t, test_short)
+compiler_popcount(uint16_t, test_ushort)
+compiler_popcount(int32_t, test_int)
+compiler_popcount(uint32_t, test_uint)
+compiler_popcount(int64_t, test_long)
+compiler_popcount(uint64_t, test_ulong)
-- 
1.7.9.5

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH v3 2/2] add utest popcount for all types.

2014-10-13 Thread xionghu . luo
From: Luo Xionghu 

v2: add all types to test.
v3: fix signed type count bits error.

Signed-off-by: Luo Xionghu 
---
 kernels/compiler_popcount.cl |   16 +
 utests/CMakeLists.txt|1 +
 utests/compiler_popcount.cpp |   75 ++
 3 files changed, 92 insertions(+)
 create mode 100644 kernels/compiler_popcount.cl
 create mode 100644 utests/compiler_popcount.cpp

diff --git a/kernels/compiler_popcount.cl b/kernels/compiler_popcount.cl
new file mode 100644
index 000..1636118
--- /dev/null
+++ b/kernels/compiler_popcount.cl
@@ -0,0 +1,16 @@
+#define TEST_TYPE(TYPE)   \
+kernel void test_##TYPE(global TYPE *src, global TYPE *dst) { \
+  int i = get_global_id(0);   \
+  dst[i] = popcount(src[i]);  \
+}
+
+TEST_TYPE(char)
+TEST_TYPE(uchar)
+TEST_TYPE(short)
+TEST_TYPE(ushort)
+TEST_TYPE(int)
+TEST_TYPE(uint)
+TEST_TYPE(long)
+TEST_TYPE(ulong)
+
+#undef TEST_TYPE
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index b45ecf9..1b8caca 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -41,6 +41,7 @@ set (utests_sources
   compiler_ceil.cpp
   compiler_clz_short.cpp
   compiler_clz_int.cpp
+  compiler_popcount.cpp
   compiler_convert_uchar_sat.cpp
   compiler_copy_buffer.cpp
   compiler_copy_image.cpp
diff --git a/utests/compiler_popcount.cpp b/utests/compiler_popcount.cpp
new file mode 100644
index 000..c960ae6
--- /dev/null
+++ b/utests/compiler_popcount.cpp
@@ -0,0 +1,75 @@
+#include "utest_helper.hpp"
+
+namespace {
+
+template
+T get_max();
+
+#define DEF_TEMPLATE(TYPE, NAME)\
+template <> \
+TYPE get_max()\
+{   \
+  static TYPE max = CL_##NAME##_MAX;\
+  return max;   \
+}   \
+\
+template <> \
+u##TYPE get_max()  \
+{   \
+  static u##TYPE max = CL_U##NAME##_MAX;\
+  return max;   \
+}
+
+DEF_TEMPLATE(int8_t, CHAR)
+DEF_TEMPLATE(int16_t, SHRT)
+DEF_TEMPLATE(int32_t, INT)
+DEF_TEMPLATE(int64_t, LONG)
+
+template
+void test(const char *kernel_name, int s_type)
+{
+  const int n = sizeof(T) * 8;
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_popcount", kernel_name);
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(T), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(T), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  globals[0] = n;
+  locals[0] = n;
+
+  OCL_MAP_BUFFER(0);
+  ((T*)buf_data[0])[0] = 0;
+  for (int32_t i = 1; i < (int32_t) n; ++i){
+((T*)buf_data[0])[i] = get_max() >> i;
+  }
+  OCL_UNMAP_BUFFER(0);
+
+  OCL_NDRANGE(1);
+
+  OCL_MAP_BUFFER(1);
+  OCL_ASSERT(((T*)buf_data[1])[0] == 0);
+  for (int i = 1; i < n; ++i){
+OCL_ASSERT(((T*)buf_data[1])[i] == n-i-s_type);
+  }
+  OCL_UNMAP_BUFFER(1);
+}
+
+}
+
+#define compiler_popcount(type, kernel, s_type) \
+static void compiler_popcount_ ##type(void)\
+{\
+  test(# kernel, s_type);\
+}\
+MAKE_UTEST_FROM_FUNCTION(compiler_popcount_ ## type);
+
+compiler_popcount(int8_t, test_char, 1)
+compiler_popcount(uint8_t, test_uchar, 0)
+compiler_popcount(int16_t, test_short, 1)
+compiler_popcount(uint16_t, test_ushort, 0)
+compiler_popcount(int32_t, test_int, 1)
+compiler_popcount(uint32_t, test_uint, 0)
+compiler_popcount(int64_t, test_long, 1)
+compiler_popcount(uint64_t, test_ulong, 0)
-- 
1.7.9.5

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH v2] use global flag 0.0 to control unstructured simple block.

2014-10-15 Thread xionghu . luo
From: Luo Xionghu 

filter the simple block out and replace the if/endif with global flag
to control.

v2: fix the luxmark sala performance degression due to extern flag in a
BRA instruction.

Signed-off-by: Luo Xionghu 
---
 backend/src/backend/gen_insn_selection.cpp |   80 ++--
 backend/src/backend/gen_insn_selection.hpp |1 +
 backend/src/backend/gen_reg_allocation.cpp |3 +-
 3 files changed, 68 insertions(+), 16 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index b2df76f..f0fd494 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -217,7 +217,7 @@ namespace gbe
   // SelectionBlock
   ///
 
-  SelectionBlock::SelectionBlock(const ir::BasicBlock *bb) : bb(bb), 
isLargeBlock(false), endifLabel( (ir::LabelIndex) 0){}
+  SelectionBlock::SelectionBlock(const ir::BasicBlock *bb) : bb(bb), 
isLargeBlock(false), endifLabel( (ir::LabelIndex) 0), 
removeSimpleIfEndif(false){}
 
   void SelectionBlock::append(ir::Register reg) { tmp.push_back(reg); }
 
@@ -405,6 +405,8 @@ namespace gbe
 uint32_t buildBasicBlockDAG(const ir::BasicBlock &bb);
 /*! Perform the selection on the basic block */
 void matchBasicBlock(const ir::BasicBlock &bb, uint32_t insnNum);
+/*! a simple block can use predication instead of if/endif*/
+bool isSimpleBlock(const ir::BasicBlock &bb, uint32_t insnNum);
 /*! A root instruction needs to be generated */
 bool isRoot(const ir::Instruction &insn) const;
 
@@ -1483,6 +1485,37 @@ namespace gbe
 return false;
   }
 
+  bool Selection::Opaque::isSimpleBlock(const ir::BasicBlock &bb, uint32_t 
insnNum) {
+
+if(bb.belongToStructure)
+  return false;
+
+for (int32_t insnID = insnNum-1; insnID >= 0; --insnID) {
+  SelectionDAG &dag = *insnDAG[insnID];
+  const ir::Instruction& insn = dag.insn;
+  if(insn.isMemberOf() ||
+ insn.isMemberOf() ||
+ insn.getOpcode() == ir::OP_SIMD_ANY ||
+ insn.getOpcode() == ir::OP_SIMD_ALL ||
+ insn.getOpcode() == ir::OP_ELSE)
+return false;
+}
+
+// there would generate a extra CMP instruction for predicated BRA with 
extern flag,
+// should retrun false to keep the if/endif.
+if((insnDAG[insnNum-1]->insn.isMemberOf())){
+  if (insnDAG[insnNum-1]->insn.getOpcode() == ir::OP_BRA) {
+const ir::BranchInstruction &insn = 
ir::cast(insnDAG[insnNum-1]->insn);
+if(insn.isPredicated() && insnDAG[insnNum-1]->child[0] == NULL){
+  return false;
+}
+  }
+}
+
+return true;
+  }
+
+
   uint32_t Selection::Opaque::buildBasicBlockDAG(const ir::BasicBlock &bb)
   {
 using namespace ir;
@@ -1563,7 +1596,8 @@ namespace gbe
 // Bottom up code generation
 bool needEndif = this->block->hasBranch == false && 
!this->block->hasBarrier;
 needEndif = needEndif && bb.needEndif;
-if (needEndif) {
+this->block->removeSimpleIfEndif = insnNum < 10 && isSimpleBlock(bb, 
insnNum);
+if (needEndif && !this->block->removeSimpleIfEndif) {
   if(!bb.needIf) // this basic block is the exit of a structure
 this->ENDIF(GenRegister::immd(0), bb.endifLabel, bb.endifLabel);
   else {
@@ -1584,6 +1618,13 @@ namespace gbe
 
 // Start a new code fragment
 this->startBackwardGeneration();
+
+if(this->block->removeSimpleIfEndif){
+  this->push();
+this->curr.predicate = GEN_PREDICATE_NORMAL;
+this->curr.flag = 0;
+this->curr.subFlag = 0;
+}
 // If there is no branch at the end of this block.
 
 // Try all the patterns from best to worst
@@ -1593,6 +1634,13 @@ namespace gbe
   ++it;
 } while (it != end);
 GBE_ASSERT(it != end);
+
+if(this->block->removeSimpleIfEndif){
+this->curr.predicate = GEN_PREDICATE_NONE;
+this->curr.flag = 0;
+this->curr.subFlag = 0;
+  this->pop();
+}
 // If we are in if/endif fix mode, and this block is
 // large enough, we need to insert endif/if pair to eliminate
 // the too long if/endif block.
@@ -3836,15 +3884,17 @@ namespace gbe
 sel.JMPI(GenRegister::immd(0), jip, label);
   sel.pop();
 }
-sel.push();
-  sel.curr.predicate = GEN_PREDICATE_NORMAL;
-  if(!insn.getParent()->needEndif && insn.getParent()->needIf) {
-ir::LabelIndex label = insn.getParent()->endifLabel;
-sel.IF(GenRegister::immd(0), label, label);
-  }
-  else
-sel.IF(GenRegister::immd(0), sel.block->endifLabel, 
sel.block->endifLabel);
-sel.pop();
+if(!sel.block->removeSimpleIfEndif){
+  sel.push();
+sel.curr.predicate = GEN_PREDICATE_NORMAL;
+if(!insn

  1   2   3   4   5   >