Bit has table will be first checked from BO if we can quarentee this BO is not
in this cs already. When bo is emited the reference to cs is removed with bit
operations.

To quarentee that there is no other cs with same id number of CS that can have
id is limited to number of bits in unsigned long.

This optimization decreases cs_write_reloc share of torcs profiling from 4.3%
to 2.6%.
---
 radeon/radeon_bo_gem.c |    1 +
 radeon/radeon_bo_int.h |    2 +-
 radeon/radeon_cs_gem.c |  111 ++++++++++++++++++++++++++++++++++-------------
 3 files changed, 82 insertions(+), 32 deletions(-)

diff --git a/radeon/radeon_bo_gem.c b/radeon/radeon_bo_gem.c
index bc8058d..1b33bdb 100644
--- a/radeon/radeon_bo_gem.c
+++ b/radeon/radeon_bo_gem.c
@@ -80,6 +80,7 @@ static struct radeon_bo *bo_open(struct radeon_bo_manager 
*bom,
     bo->base.domains = domains;
     bo->base.flags = flags;
     bo->base.ptr = NULL;
+    bo->base.referenced_in_cs = 0;
     bo->map_count = 0;
     if (handle) {
         struct drm_gem_open open_arg;
diff --git a/radeon/radeon_bo_int.h b/radeon/radeon_bo_int.h
index 9589ead..d1df829 100644
--- a/radeon/radeon_bo_int.h
+++ b/radeon/radeon_bo_int.h
@@ -17,7 +17,7 @@ struct radeon_bo_int {
     unsigned                    cref;
     struct radeon_bo_manager    *bom;
     uint32_t                    space_accounted;
-    uint32_t                    referenced_in_cs;
+    unsigned long               referenced_in_cs;
 };
 
 /* bo functions */
diff --git a/radeon/radeon_cs_gem.c b/radeon/radeon_cs_gem.c
index 45a219c..d23aa35 100644
--- a/radeon/radeon_cs_gem.c
+++ b/radeon/radeon_cs_gem.c
@@ -32,6 +32,7 @@
 #include <assert.h>
 #include <errno.h>
 #include <stdlib.h>
+#include <pthread.h>
 #include <sys/mman.h>
 #include <sys/ioctl.h>
 #include "radeon_cs.h"
@@ -63,11 +64,49 @@ struct cs_gem {
     struct radeon_cs_int            base;
     struct drm_radeon_cs        cs;
     struct drm_radeon_cs_chunk  chunks[2];
+    unsigned long               id;
     unsigned                    nrelocs;
     uint32_t                    *relocs;
     struct radeon_bo_int        **relocs_bo;
 };
 
+static pthread_mutex_t id_mutex = PTHREAD_MUTEX_INITIALIZER;
+static unsigned long cs_id_source = 0;
+
+/**
+ * Returns a free id for cs.
+ * If there is no free id we return zero
+ **/
+static unsigned long generate_id(void)
+{
+    unsigned long r = 0,x;
+    pthread_mutex_lock( &id_mutex );
+    /* check for free ids */
+    if (cs_id_source != ~r) {
+        /* find first zero bit */
+        x = cs_id_source + 1; /* 10111 -> 1100 */
+        r = ~cs_id_source;    /* 10111 -> 0100 */
+        r = x & r;            /* x & r -> 0100 */
+
+        /* set id as reserved */
+        cs_id_source |= r;
+    }
+    pthread_mutex_unlock( &id_mutex );
+    return r;
+}
+
+/**
+ * Free the id for later reuse
+ **/
+static void free_id(unsigned long id)
+{
+    pthread_mutex_lock( &id_mutex );
+
+    cs_id_source &= ~id;
+
+    pthread_mutex_unlock( &id_mutex );
+}
+
 static struct radeon_cs_int *cs_gem_create(struct radeon_cs_manager *csm,
                                        uint32_t ndw)
 {
@@ -90,6 +129,7 @@ static struct radeon_cs_int *cs_gem_create(struct 
radeon_cs_manager *csm,
     }
     csg->base.relocs_total_size = 0;
     csg->base.crelocs = 0;
+    csg->id = generate_id();
     csg->nrelocs = 4096 / (4 * 4) ;
     csg->relocs_bo = (struct radeon_bo_int**)calloc(1,
                                                 csg->nrelocs*sizeof(void*));
@@ -141,38 +181,43 @@ static int cs_gem_write_reloc(struct radeon_cs_int *cs,
     if (write_domain == RADEON_GEM_DOMAIN_CPU) {
         return -EINVAL;
     }
-    /* check if bo is already referenced */
-    for(i = 0; i < cs->crelocs; i++) {
-        idx = i * RELOC_SIZE;
-        reloc = (struct cs_reloc_gem*)&csg->relocs[idx];
-        if (reloc->handle == bo->handle) {
-            /* Check domains must be in read or write. As we check already
-             * checked that in argument one of the read or write domain was
-             * set we only need to check that if previous reloc as the read
-             * domain set then the read_domain should also be set for this
-             * new relocation.
-             */
-            /* the DDX expects to read and write from same pixmap */
-            if (write_domain && (reloc->read_domain & write_domain)) {
-                reloc->read_domain = 0;
-                reloc->write_domain = write_domain;
-            } else if (read_domain & reloc->write_domain) {
-                reloc->read_domain = 0;
-            } else {
-                if (write_domain != reloc->write_domain)
-                    return -EINVAL;
-                if (read_domain != reloc->read_domain)
-                    return -EINVAL;
+    /* use bit field hash functionto determine
+       if this bo is for sure not in this cs.*/
+    if ((boi->referenced_in_cs & csg->id)) {
+        /* check if bo is already referenced */
+        for(i = cs->crelocs; i != 0;) {
+            --i;
+            idx = i * RELOC_SIZE;
+            reloc = (struct cs_reloc_gem*)&csg->relocs[idx];
+            if (reloc->handle == bo->handle) {
+                /* Check domains must be in read or write. As we check already
+                 * checked that in argument one of the read or write domain was
+                 * set we only need to check that if previous reloc as the read
+                 * domain set then the read_domain should also be set for this
+                 * new relocation.
+                 */
+                /* the DDX expects to read and write from same pixmap */
+                if (write_domain && (reloc->read_domain & write_domain)) {
+                    reloc->read_domain = 0;
+                    reloc->write_domain = write_domain;
+                } else if (read_domain & reloc->write_domain) {
+                    reloc->read_domain = 0;
+                } else {
+                    if (write_domain != reloc->write_domain)
+                        return -EINVAL;
+                    if (read_domain != reloc->read_domain)
+                        return -EINVAL;
+                }
+
+                reloc->read_domain |= read_domain;
+                reloc->write_domain |= write_domain;
+                /* update flags */
+                reloc->flags |= (flags & reloc->flags);
+                /* write relocation packet */
+                radeon_cs_write_dword((struct radeon_cs *)cs, 0xc0001000);
+                radeon_cs_write_dword((struct radeon_cs *)cs, idx);
+                return 0;
             }
-
-            reloc->read_domain |= read_domain;
-            reloc->write_domain |= write_domain;
-            /* update flags */
-            reloc->flags |= (flags & reloc->flags);
-            /* write relocation packet */
-            radeon_cs_write_dword((struct radeon_cs *)cs, 0xc0001000);
-            radeon_cs_write_dword((struct radeon_cs *)cs, idx);
-            return 0;
         }
     }
     /* new relocation */
@@ -203,6 +248,7 @@ static int cs_gem_write_reloc(struct radeon_cs_int *cs,
     reloc->flags = flags;
     csg->chunks[1].length_dw += RELOC_SIZE;
     radeon_bo_ref(bo);
+    boi->referenced_in_cs |= csg->id;
     cs->relocs_total_size += boi->size;
     radeon_cs_write_dword((struct radeon_cs *)cs, 0xc0001000);
     radeon_cs_write_dword((struct radeon_cs *)cs, idx);
@@ -288,6 +334,7 @@ static int cs_gem_emit(struct radeon_cs_int *cs)
                             &csg->cs, sizeof(struct drm_radeon_cs));
     for (i = 0; i < csg->base.crelocs; i++) {
         csg->relocs_bo[i]->space_accounted = 0;
+        csg->relocs_bo[i]->referenced_in_cs &= ~csg->id;
         radeon_bo_unref((struct radeon_bo *)csg->relocs_bo[i]);
         csg->relocs_bo[i] = NULL;
     }
@@ -302,6 +349,7 @@ static int cs_gem_destroy(struct radeon_cs_int *cs)
 {
     struct cs_gem *csg = (struct cs_gem*)cs;
 
+    free_id(csg->id);
     free(csg->relocs_bo);
     free(cs->relocs);
     free(cs->packets);
@@ -317,6 +365,7 @@ static int cs_gem_erase(struct radeon_cs_int *cs)
     if (csg->relocs_bo) {
         for (i = 0; i < csg->base.crelocs; i++) {
             if (csg->relocs_bo[i]) {
+                csg->relocs_bo[i]->referenced_in_cs &= ~csg->id;
                 radeon_bo_unref((struct radeon_bo *)csg->relocs_bo[i]);
                 csg->relocs_bo[i] = NULL;
             }
-- 
1.6.3.3


------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
_______________________________________________
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel

Reply via email to