bo-referenced_in_cs is checked if bo is already in cs. Adding and removing
reference in bo is done with atomic operations to allow parallel access to a
bo from multiple contexts.
cs-id generation code quarentees there is not duplicated ids which limits
number of cs-ids to 32. If there is more cs objects rest will get id 0.
This optimization decreases cs_write_reloc share of torcs profiling from 4.3%
to 2.6%.
Signed-off-by: Pauli Nieminen suok...@gmail.com
---
radeon/radeon_bo_gem.c |1 +
radeon/radeon_bo_int.h |4 +-
radeon/radeon_cs.c |6 ++
radeon/radeon_cs.h |2 +-
radeon/radeon_cs_gem.c | 123
radeon/radeon_cs_int.h |1 +
xf86atomic.h |6 ++
7 files changed, 110 insertions(+), 33 deletions(-)
diff --git a/radeon/radeon_bo_gem.c b/radeon/radeon_bo_gem.c
index bc8058d..fb51f4a 100644
--- a/radeon/radeon_bo_gem.c
+++ b/radeon/radeon_bo_gem.c
@@ -80,6 +80,7 @@ static struct radeon_bo *bo_open(struct radeon_bo_manager
*bom,
bo-base.domains = domains;
bo-base.flags = flags;
bo-base.ptr = NULL;
+atomic_set(bo-base.referenced_in_cs, 0);
bo-map_count = 0;
if (handle) {
struct drm_gem_open open_arg;
diff --git a/radeon/radeon_bo_int.h b/radeon/radeon_bo_int.h
index 9589ead..9c0ae68 100644
--- a/radeon/radeon_bo_int.h
+++ b/radeon/radeon_bo_int.h
@@ -1,6 +1,8 @@
#ifndef RADEON_BO_INT
#define RADEON_BO_INT
+#include xf86atomic.h
+
struct radeon_bo_manager {
struct radeon_bo_funcs *funcs;
int fd;
@@ -17,7 +19,7 @@ struct radeon_bo_int {
unsignedcref;
struct radeon_bo_manager*bom;
uint32_tspace_accounted;
-uint32_treferenced_in_cs;
+atomic_treferenced_in_cs;
};
/* bo functions */
diff --git a/radeon/radeon_cs.c b/radeon/radeon_cs.c
index cc9be39..d0e922b 100644
--- a/radeon/radeon_cs.c
+++ b/radeon/radeon_cs.c
@@ -88,3 +88,9 @@ void radeon_cs_space_set_flush(struct radeon_cs *cs, void
(*fn)(void *), void *d
csi-space_flush_fn = fn;
csi-space_flush_data = data;
}
+
+uint32_t radeon_cs_get_id(struct radeon_cs *cs)
+{
+struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+return csi-id;
+}
diff --git a/radeon/radeon_cs.h b/radeon/radeon_cs.h
index 49d5d9a..7f6ee68 100644
--- a/radeon/radeon_cs.h
+++ b/radeon/radeon_cs.h
@@ -85,7 +85,7 @@ extern int radeon_cs_write_reloc(struct radeon_cs *cs,
uint32_t read_domain,
uint32_t write_domain,
uint32_t flags);
-
+extern uint32_t radeon_cs_get_id(struct radeon_cs *cs);
/*
* add a persistent BO to the list
* a persistent BO is one that will be referenced across flushes,
diff --git a/radeon/radeon_cs_gem.c b/radeon/radeon_cs_gem.c
index 45a219c..ef5d3d5 100644
--- a/radeon/radeon_cs_gem.c
+++ b/radeon/radeon_cs_gem.c
@@ -32,6 +32,7 @@
#include assert.h
#include errno.h
#include stdlib.h
+#include pthread.h
#include sys/mman.h
#include sys/ioctl.h
#include radeon_cs.h
@@ -41,6 +42,7 @@
#include radeon_bo_gem.h
#include drm.h
#include xf86drm.h
+#include xf86atomic.h
#include radeon_drm.h
struct radeon_cs_manager_gem {
@@ -68,6 +70,50 @@ struct cs_gem {
struct radeon_bo_int**relocs_bo;
};
+static pthread_mutex_t id_mutex = PTHREAD_MUTEX_INITIALIZER;
+static uint32_t cs_id_source = 0;
+
+/**
+ * result is undefined if called with ~0
+ */
+static uint32_t get_first_zero(const uint32_t n)
+{
+/* __builtin_ctz returns number of trailing zeros. */
+return 1 __builtin_ctz(~n);
+}
+
+/**
+ * Returns a free id for cs.
+ * If there is no free id we return zero
+ **/
+static uint32_t generate_id(void)
+{
+uint32_t r = 0;
+pthread_mutex_lock( id_mutex );
+/* check for free ids */
+if (cs_id_source != ~r) {
+/* find first zero bit */
+r = get_first_zero(cs_id_source);
+
+/* set id as reserved */
+cs_id_source |= r;
+}
+pthread_mutex_unlock( id_mutex );
+return r;
+}
+
+/**
+ * Free the id for later reuse
+ **/
+static void free_id(uint32_t id)
+{
+pthread_mutex_lock( id_mutex );
+
+cs_id_source = ~id;
+
+pthread_mutex_unlock( id_mutex );
+}
+
static struct radeon_cs_int *cs_gem_create(struct radeon_cs_manager *csm,
uint32_t ndw)
{
@@ -90,6 +136,7 @@ static struct radeon_cs_int *cs_gem_create(struct
radeon_cs_manager *csm,
}
csg-base.relocs_total_size = 0;
csg-base.crelocs = 0;
+csg-base.id = generate_id();
csg-nrelocs = 4096 / (4 * 4) ;
csg-relocs_bo = (struct radeon_bo_int**)calloc(1,
csg-nrelocs*sizeof(void*));
@@ -141,38 +188,45 @@ static int cs_gem_write_reloc(struct radeon_cs_int *cs,
if (write_domain == RADEON_GEM_DOMAIN_CPU) {
return