Code is based on my hw-generated binding table code for Mesa
adapted to i965_composite path in UXA.

Signed-off-by: Abdiel Janulgue <abdiel.janul...@linux.intel.com>
---
 src/uxa/i965_3d.c           |    5 ++-
 src/uxa/i965_reg.h          |    8 +++++
 src/uxa/i965_render.c       |   78 +++++++++++++++++++++++++++++++++++--------
 src/uxa/intel_batchbuffer.c |    7 ++--
 4 files changed, 80 insertions(+), 18 deletions(-)

diff --git a/src/uxa/i965_3d.c b/src/uxa/i965_3d.c
index 757a979..afbb5a7 100644
--- a/src/uxa/i965_3d.c
+++ b/src/uxa/i965_3d.c
@@ -406,7 +406,10 @@ gen7_upload_binding_table(intel_screen_private *intel,
                          uint32_t ps_binding_table_offset)
 {
        OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
-       OUT_BATCH(ps_binding_table_offset);
+       if (intel->use_resource_streamer)
+               OUT_BATCH(ps_binding_table_offset >> 1);
+       else
+               OUT_BATCH(ps_binding_table_offset);
 }
 
 void
diff --git a/src/uxa/i965_reg.h b/src/uxa/i965_reg.h
index a934a67..157b212 100644
--- a/src/uxa/i965_reg.h
+++ b/src/uxa/i965_reg.h
@@ -296,6 +296,14 @@
 /* DW1 */
 # define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16
 
+/* GEN7+ resource streamer */
+#define GEN7_3DSTATE_BINDING_TABLE_POOL_ALLOC           BRW_3D(3, 1, 0x19)
+# define BINDING_TABLE_POOL_ENABLE              0x0860
+#define GEN7_3DSTATE_BINDING_TABLE_EDIT_VS              BRW_3D(3, 0, 0x43)
+#define GEN7_3DSTATE_BINDING_TABLE_EDIT_GS              BRW_3D(3, 0, 0x44)
+#define GEN7_3DSTATE_BINDING_TABLE_EDIT_HS              BRW_3D(3, 0, 0x45)
+#define GEN7_3DSTATE_BINDING_TABLE_EDIT_DS              BRW_3D(3, 0, 0x46)
+#define GEN7_3DSTATE_BINDING_TABLE_EDIT_PS              BRW_3D(3, 0, 0x47)
 
 #define PIPELINE_SELECT_3D             0
 #define PIPELINE_SELECT_MEDIA          1
diff --git a/src/uxa/i965_render.c b/src/uxa/i965_render.c
index 74f57af..d5225dd 100644
--- a/src/uxa/i965_render.c
+++ b/src/uxa/i965_render.c
@@ -1783,6 +1783,10 @@ static void i965_surface_flush(struct 
intel_screen_private *intel)
                                   sizeof(intel->surface_data), 4096);
        assert(intel->surface_bo);
 
+       drm_intel_bo_unreference(intel->hw_bt_pool_bo);
+       intel->hw_bt_pool_bo = drm_intel_bo_alloc(intel->bufmgr, "hw_bt",
+                                                 131072, 4096);
+       assert(intel->hw_bt_pool_bo);
        return;
        (void)ret;
 }
@@ -2217,32 +2221,70 @@ static void i965_select_vertex_buffer(struct 
intel_screen_private *intel)
 static void i965_bind_surfaces(struct intel_screen_private *intel)
 {
        uint32_t *binding_table;
+       uint32_t surf0 = 0, surf1 = 0, surf2 = 0;
 
        assert(intel->surface_used + 4 * SURFACE_STATE_PADDED_SIZE <= 
sizeof(intel->surface_data));
 
-       binding_table = (uint32_t*) (intel->surface_data + intel->surface_used);
-       intel->surface_table = intel->surface_used;
-       intel->surface_used += SURFACE_STATE_PADDED_SIZE;
-
-       binding_table[0] =
-               i965_set_picture_surface_state(intel,
+       surf0 = i965_set_picture_surface_state(intel,
                                               intel->render_dest_picture,
                                               intel->render_dest,
                                               TRUE);
-       binding_table[1] =
-               i965_set_picture_surface_state(intel,
+       surf1 = i965_set_picture_surface_state(intel,
                                               intel->render_source_picture,
                                               intel->render_source,
                                               FALSE);
        if (intel->render_mask) {
-               binding_table[2] =
-                       i965_set_picture_surface_state(intel,
-                                                      
intel->render_mask_picture,
-                                                      intel->render_mask,
-                                                      FALSE);
+               surf2  = i965_set_picture_surface_state(intel,
+                                                       
intel->render_mask_picture,
+                                                       intel->render_mask,
+                                                       FALSE);
+       }
+
+       if (intel->use_resource_streamer) {
+               intel->surface_table += (256 * sizeof(uint16_t));
+               OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_EDIT_PS | (5 - 2));
+               OUT_BATCH(0x3);
+               {
+                       OUT_BATCH(0 << 16 | surf0 >> 5);
+                       OUT_BATCH(1 << 16 | surf1 >> 5);
+                       OUT_BATCH(2 << 16 | surf2 >> 5);
+               }
+       } else {
+               binding_table = (uint32_t*) (intel->surface_data + 
intel->surface_used);
+               intel->surface_table = intel->surface_used;
+               intel->surface_used += SURFACE_STATE_PADDED_SIZE;
+
+               binding_table[0] = surf0;
+               binding_table[1] = surf1;
+               binding_table[2] = surf2;
        }
 }
 
+static void i965_enable_hw_binding_table(struct intel_screen_private *intel)
+{
+       if (!intel->use_resource_streamer)
+               return;
+
+       OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POOL_ALLOC | (3 - 2));
+       OUT_RELOC(intel->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0,
+                 BINDING_TABLE_POOL_ENABLE);
+       OUT_RELOC(intel->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0,
+                 intel->hw_bt_pool_bo->size);
+
+       OUT_BATCH(BRW_PIPE_CONTROL | (4 - 2));
+       OUT_BATCH(BRW_PIPE_CONTROL_GLOBAL_GTT);
+       OUT_BATCH(0); /* address */
+       OUT_BATCH(0); /* write data */
+
+       /* Do a block clear for existing on-chip binding table entries
+          that might have stuck from the old batch. Otherwise, this
+          causes GPU hungs
+       */
+       OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_EDIT_PS | (3 - 2));
+       OUT_BATCH(0xffff << 16 | 0x3 );
+       OUT_BATCH(0);
+}
+
 void
 i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
               int dstX, int dstY, int w, int h)
@@ -2252,6 +2294,7 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int 
maskX, int maskY,
 
        intel_batch_start_atomic(scrn, 200);
        if (intel->needs_render_state_emit) {
+               i965_enable_hw_binding_table(intel);
                i965_bind_surfaces(intel);
 
                if (INTEL_INFO(intel)->gen >= 060)
@@ -2349,6 +2392,8 @@ void gen4_render_state_init(ScrnInfoPtr scrn)
                drm_intel_bo_alloc(intel->bufmgr, "surface data",
                                   sizeof(intel->surface_data), 4096);
        assert(intel->surface_bo);
+       intel->hw_bt_pool_bo = drm_intel_bo_alloc(intel->bufmgr, "hw_bt",
+                                                 131072, 4096);
 
        intel->surface_used = 0;
 
@@ -2445,6 +2490,7 @@ void gen4_render_state_cleanup(ScrnInfoPtr scrn)
        int i, j, k, l, m;
 
        drm_intel_bo_unreference(intel->surface_bo);
+       drm_intel_bo_unreference(intel->hw_bt_pool_bo);
        drm_intel_bo_unreference(render_state->vs_state_bo);
        drm_intel_bo_unreference(render_state->sf_state_bo);
        drm_intel_bo_unreference(render_state->sf_mask_state_bo);
@@ -2571,9 +2617,13 @@ 
gen6_composite_create_depth_stencil_state(intel_screen_private *intel)
        (void)ret;
 }
 
+#define MI_RS_CONTROL                           (0x6 << 23)
+
 static void
 gen6_composite_state_base_address(intel_screen_private *intel)
 {
+       OUT_BATCH(MI_RS_CONTROL | 0x0);
+
        OUT_BATCH(BRW_STATE_BASE_ADDRESS | (10 - 2));
        OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state base address */
        intel->surface_reloc = intel->batch_used;
@@ -2586,6 +2636,8 @@ gen6_composite_state_base_address(intel_screen_private 
*intel)
        OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
        OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
        OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
+
+       OUT_BATCH(MI_RS_CONTROL | 0x1);
 }
 
 static void
diff --git a/src/uxa/intel_batchbuffer.c b/src/uxa/intel_batchbuffer.c
index dedf7f8..347413b 100644
--- a/src/uxa/intel_batchbuffer.c
+++ b/src/uxa/intel_batchbuffer.c
@@ -260,13 +260,12 @@ void intel_batch_submit(ScrnInfoPtr scrn)
        }
 
        ret = dri_bo_subdata(intel->batch_bo, 0, intel->batch_used*4, 
intel->batch_ptr);
+       uint32_t flags = HAS_BLT(intel) ? intel->current_batch: 
I915_EXEC_DEFAULT;
+       flags |= intel->use_resource_streamer ? I915_EXEC_RESOURCE_STREAMER : 0;
        if (ret == 0) {
                ret = drm_intel_bo_mrb_exec(intel->batch_bo,
                                intel->batch_used*4,
-                               NULL, 0, 0xffffffff,
-                               (HAS_BLT(intel) ?
-                                intel->current_batch:
-                                I915_EXEC_DEFAULT));
+                               NULL, 0, 0xffffffff, flags);
        }
 
        if (ret != 0) {
-- 
1.7.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to