Re: [PATCH 2/2] libdrm_radeon: Optimize cs_gem_reloc to do less looping.

2010-03-11 Thread Michel Dänzer
On Wed, 2010-03-10 at 20:42 +0200, Pauli Nieminen wrote: 
 bo-referenced_in_cs is checked if bo is already in cs. Adding and removing
 reference in bo is done with atomic operations to allow parallel access to a
 bo from multiple contexts.
 
 cs-id generation code quarentees there is not duplicated ids which limits
 number of cs-ids to 32. If there is more cs objects rest will get id 0.
 
 This optimization decreases cs_write_reloc share of torcs profiling from 4.3%
 to 2.6%.
 
 Signed-off-by: Pauli Nieminen suok...@gmail.com

This also needs something like the below for configure to verify that
atomic ops are available.

diff --git a/configure.ac b/configure.ac
index fe00176..e419dd7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -173,7 +173,7 @@ if test x$HAVE_LIBUDEV = xyes; then
 fi
 AM_CONDITIONAL(HAVE_LIBUDEV, [test x$HAVE_LIBUDEV = xyes])
 
-if test x$INTEL != xno; then
+if test x$INTEL != xno || test x$RADEON != xno; then
 # Check for atomic intrinsics
 AC_CACHE_CHECK([for native atomic primitives], drm_cv_atomic_primitives,
 [
@@ -206,13 +206,23 @@ if test x$INTEL != xno; then
 fi
 
 if test x$drm_cv_atomic_primitives = xnone; then
-   if test x$INTEL != xauto; then
+   if test x$INTEL = xyes; then
AC_MSG_ERROR([libdrm_intel depends upon atomic operations, 
which were not found for your compiler/cpu. Try compiling with -march=native, 
or install the libatomics-op-dev package, or, failing both of those, disable 
support for Intel GPUs by passing --disable-intel to ./configure])
   else
INTEL=no
   fi
+   if test x$RADEON = xyes; then
+   AC_MSG_ERROR([libdrm_radeon depends upon atomic operations, 
which were not found for your compiler/cpu. Try compiling with -march=native, 
or install the libatomics-op-dev package, or, failing both of those, disable 
support for Radeon GPUs by passing --disable-radeon to ./configure])
+  else
+   RADEON=no
+  fi
 else
-  INTEL=yes
+   if test x$INTEL != xno; then
+   INTEL=yes
+   fi
+   if test x$RADEON != xno; then
+   RADEON=yes
+   fi
 fi
 fi
 



-- 
Earthling Michel Dänzer   |http://www.vmware.com
Libre software enthusiast |  Debian, X and DRI developer

--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 2/2] libdrm_radeon: Optimize cs_gem_reloc to do less looping.

2010-03-10 Thread Pauli Nieminen
bo-referenced_in_cs is checked if bo is already in cs. Adding and removing
reference in bo is done with atomic operations to allow parallel access to a
bo from multiple contexts.

cs-id generation code quarentees there is not duplicated ids which limits
number of cs-ids to 32. If there is more cs objects rest will get id 0.

This optimization decreases cs_write_reloc share of torcs profiling from 4.3%
to 2.6%.

Signed-off-by: Pauli Nieminen suok...@gmail.com
---
 radeon/radeon_bo_gem.c |1 +
 radeon/radeon_bo_int.h |4 +-
 radeon/radeon_cs.c |6 ++
 radeon/radeon_cs.h |2 +-
 radeon/radeon_cs_gem.c |  123 
 radeon/radeon_cs_int.h |1 +
 xf86atomic.h   |6 ++
 7 files changed, 110 insertions(+), 33 deletions(-)

diff --git a/radeon/radeon_bo_gem.c b/radeon/radeon_bo_gem.c
index bc8058d..fb51f4a 100644
--- a/radeon/radeon_bo_gem.c
+++ b/radeon/radeon_bo_gem.c
@@ -80,6 +80,7 @@ static struct radeon_bo *bo_open(struct radeon_bo_manager 
*bom,
 bo-base.domains = domains;
 bo-base.flags = flags;
 bo-base.ptr = NULL;
+atomic_set(bo-base.referenced_in_cs, 0);
 bo-map_count = 0;
 if (handle) {
 struct drm_gem_open open_arg;
diff --git a/radeon/radeon_bo_int.h b/radeon/radeon_bo_int.h
index 9589ead..9c0ae68 100644
--- a/radeon/radeon_bo_int.h
+++ b/radeon/radeon_bo_int.h
@@ -1,6 +1,8 @@
 #ifndef RADEON_BO_INT
 #define RADEON_BO_INT
 
+#include xf86atomic.h
+
 struct radeon_bo_manager {
 struct radeon_bo_funcs  *funcs;
 int fd;
@@ -17,7 +19,7 @@ struct radeon_bo_int {
 unsignedcref;
 struct radeon_bo_manager*bom;
 uint32_tspace_accounted;
-uint32_treferenced_in_cs;
+atomic_treferenced_in_cs;
 };
 
 /* bo functions */
diff --git a/radeon/radeon_cs.c b/radeon/radeon_cs.c
index cc9be39..d0e922b 100644
--- a/radeon/radeon_cs.c
+++ b/radeon/radeon_cs.c
@@ -88,3 +88,9 @@ void radeon_cs_space_set_flush(struct radeon_cs *cs, void 
(*fn)(void *), void *d
 csi-space_flush_fn = fn;
 csi-space_flush_data = data;
 }
+
+uint32_t radeon_cs_get_id(struct radeon_cs *cs)
+{
+struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+return csi-id;
+}
diff --git a/radeon/radeon_cs.h b/radeon/radeon_cs.h
index 49d5d9a..7f6ee68 100644
--- a/radeon/radeon_cs.h
+++ b/radeon/radeon_cs.h
@@ -85,7 +85,7 @@ extern int radeon_cs_write_reloc(struct radeon_cs *cs,
  uint32_t read_domain,
  uint32_t write_domain,
  uint32_t flags);
-
+extern uint32_t radeon_cs_get_id(struct radeon_cs *cs);
 /*
  * add a persistent BO to the list
  * a persistent BO is one that will be referenced across flushes,
diff --git a/radeon/radeon_cs_gem.c b/radeon/radeon_cs_gem.c
index 45a219c..ef5d3d5 100644
--- a/radeon/radeon_cs_gem.c
+++ b/radeon/radeon_cs_gem.c
@@ -32,6 +32,7 @@
 #include assert.h
 #include errno.h
 #include stdlib.h
+#include pthread.h
 #include sys/mman.h
 #include sys/ioctl.h
 #include radeon_cs.h
@@ -41,6 +42,7 @@
 #include radeon_bo_gem.h
 #include drm.h
 #include xf86drm.h
+#include xf86atomic.h
 #include radeon_drm.h
 
 struct radeon_cs_manager_gem {
@@ -68,6 +70,50 @@ struct cs_gem {
 struct radeon_bo_int**relocs_bo;
 };
 
+static pthread_mutex_t id_mutex = PTHREAD_MUTEX_INITIALIZER;
+static uint32_t cs_id_source = 0;
+
+/**
+ * result is undefined if called with ~0
+ */
+static uint32_t get_first_zero(const uint32_t n)
+{
+/* __builtin_ctz returns number of trailing zeros. */
+return 1  __builtin_ctz(~n);
+}
+
+/**
+ * Returns a free id for cs.
+ * If there is no free id we return zero
+ **/
+static uint32_t generate_id(void)
+{
+uint32_t r = 0;
+pthread_mutex_lock( id_mutex );
+/* check for free ids */
+if (cs_id_source != ~r) {
+/* find first zero bit */
+r = get_first_zero(cs_id_source);
+
+/* set id as reserved */
+cs_id_source |= r;
+}
+pthread_mutex_unlock( id_mutex );
+return r;
+}
+
+/**
+ * Free the id for later reuse
+ **/
+static void free_id(uint32_t id)
+{
+pthread_mutex_lock( id_mutex );
+
+cs_id_source = ~id;
+
+pthread_mutex_unlock( id_mutex );
+}
+
 static struct radeon_cs_int *cs_gem_create(struct radeon_cs_manager *csm,
uint32_t ndw)
 {
@@ -90,6 +136,7 @@ static struct radeon_cs_int *cs_gem_create(struct 
radeon_cs_manager *csm,
 }
 csg-base.relocs_total_size = 0;
 csg-base.crelocs = 0;
+csg-base.id = generate_id();
 csg-nrelocs = 4096 / (4 * 4) ;
 csg-relocs_bo = (struct radeon_bo_int**)calloc(1,
 csg-nrelocs*sizeof(void*));
@@ -141,38 +188,45 @@ static int cs_gem_write_reloc(struct radeon_cs_int *cs,
 if (write_domain == RADEON_GEM_DOMAIN_CPU) {
 return