Instead of building batch directly to memory, build into cmd and
state arrays. This representation allows us more flexibility in batch
state expression and batch generation/relocation.

As a bonus, we can also attach the line information that produced the
batch data to help debugging.

There is no change in the output states produced. This can be considered
as a preparatory patch to help introduce gen8 golden state.

Signed-off-by: Mika Kuoppala <mika.kuopp...@intel.com>
---
 tools/null_state_gen/intel_batchbuffer.c      |  251 +++++++++++++++++--------
 tools/null_state_gen/intel_batchbuffer.h      |   86 +++++----
 tools/null_state_gen/intel_null_state_gen.c   |  100 +++-------
 tools/null_state_gen/intel_renderstate_gen6.c |  136 ++++++--------
 tools/null_state_gen/intel_renderstate_gen7.c |  126 ++++---------
 tools/null_state_gen/intel_renderstate_gen8.c |  167 ++++++----------
 6 files changed, 404 insertions(+), 462 deletions(-)

diff --git a/tools/null_state_gen/intel_batchbuffer.c 
b/tools/null_state_gen/intel_batchbuffer.c
index 62e052a..2a0b340 100644
--- a/tools/null_state_gen/intel_batchbuffer.c
+++ b/tools/null_state_gen/intel_batchbuffer.c
@@ -29,145 +29,248 @@
  **************************************************************************/
 
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 #include <errno.h>
+#include <assert.h>
 
 #include "intel_batchbuffer.h"
 
-int intel_batch_reset(struct intel_batchbuffer *batch,
-                     void *p,
-                     uint32_t size,
-                     uint32_t off)
+void bb_area_emit(struct bb_area *a, uint32_t dword, item_type type, const 
char *str)
 {
-       batch->err = -EINVAL;
-       batch->base = batch->base_ptr = p;
-       batch->state_base = batch->state_ptr = p;
+       struct bb_item *item;
+       assert(a != NULL);
+       assert(a->num_items < MAX_ITEMS);
+       item = &a->item[a->num_items];
 
-       if (off >= size || ALIGN(off, 4) != off)
-               return -EINVAL;
+       item->data = dword;
+       item->type = type;
+       strncpy(item->str, str, MAX_STRLEN);
+       item->str[MAX_STRLEN - 1] = 0;
 
-       batch->size = size;
+       a->num_items++;
+}
 
-       batch->state_base = batch->state_ptr = &batch->base[off];
+void bb_area_emit_offset(struct bb_area *a, unsigned offset, uint32_t dword, 
item_type type, const char *str)
+{
+       const unsigned i = offset / 4;
+       struct bb_item *item;
+       assert(a != NULL);
+       assert(a->num_items < MAX_ITEMS);
+       assert(i < a->num_items);
+       item = &a->item[i];
+
+       item->data = dword;
+       item->type = type;
+       strncpy(item->str, str, MAX_STRLEN);
+       item->str[MAX_STRLEN - 1] = 0;
+}
 
-       batch->num_relocs = 0;
-       batch->err = 0;
+static struct bb_item *bb_area_get(struct bb_area *a, unsigned i)
+{
+       assert (i < a->num_items);
+       return &a->item[i];
+}
 
-       return batch->err;
+static unsigned bb_area_items(struct bb_area *a)
+{
+       return a->num_items;
 }
 
-uint32_t intel_batch_state_used(struct intel_batchbuffer *batch)
+static unsigned long bb_area_used(struct bb_area *a)
 {
-       return batch->state_ptr - batch->state_base;
+       assert(a != NULL);
+       assert(a->num_items <= MAX_ITEMS);
+
+       return a->num_items * 4;
 }
 
-uint32_t intel_batch_state_offset(struct intel_batchbuffer *batch)
+static unsigned long bb_area_room(struct bb_area *a)
 {
-       return batch->state_ptr - batch->base;
+       assert (a != NULL);
+       assert (a->num_items <= MAX_ITEMS);
+
+       return (MAX_ITEMS - a->num_items) * 4;
 }
 
-void *intel_batch_state_alloc(struct intel_batchbuffer *batch,
-                             uint32_t size,
-                             uint32_t align)
+struct intel_batchbuffer *intel_batchbuffer_create(void)
 {
-       uint32_t cur;
-       uint32_t offset;
+       struct intel_batchbuffer *batch;
 
-       if (batch->err)
+       batch = calloc(1, sizeof(*batch));
+       if (batch == NULL)
                return NULL;
 
-       cur  = intel_batch_state_offset(batch);
-       offset = ALIGN(cur, align);
+       batch->cmds = calloc(1, sizeof(struct bb_area));
+       if (batch->cmds == NULL) {
+               free(batch);
+               return NULL;
+       }
 
-       if (offset + size > batch->size) {
-               batch->err = -ENOSPC;
+       batch->state = calloc(1, sizeof(struct bb_area));
+       if (batch->state == NULL) {
+               free(batch->cmds);
+               free(batch);
                return NULL;
        }
 
-       batch->state_ptr = batch->base + offset + size;
+       batch->state_start_offset = -1;
+       batch->cmds_end_offset = -1;
 
-       memset(batch->base + cur, 0, size);
+       return batch;
+}
 
-       return batch->base + offset;
+static void bb_area_align(struct bb_area *a, unsigned align)
+{
+       if (align == 0)
+               return;
+
+       assert((align % 4) == 0);
+
+       while ((a->num_items * 4) % align != 0)
+               bb_area_emit(a, 0, PAD, "align pad");
 }
 
-int intel_batch_offset(struct intel_batchbuffer *batch, const void *ptr)
+static int reloc_exists(struct intel_batchbuffer *batch, uint32_t offset)
 {
-       return (uint8_t *)ptr - batch->base;
+       int i;
+
+       for (i = 0; i < batch->cmds->num_items; i++)
+               if ((batch->cmds->item[i].type == RELOC ||
+                    batch->cmds->item[i].type == RELOC_STATE) &&
+                   i * 4 == offset)
+                       return 1;
+
+       return 0;
 }
 
-int intel_batch_state_copy(struct intel_batchbuffer *batch,
-                          const void *ptr,
-                          const uint32_t size,
-                          const uint32_t align)
+int intel_batch_is_reloc(struct intel_batchbuffer *batch, unsigned i)
 {
-       void * const p = intel_batch_state_alloc(batch, size, align);
+       return reloc_exists(batch, i * 4);
+}
 
-       if (p == NULL)
-               return -1;
+static void intel_batch_cmd_align(struct intel_batchbuffer *batch, unsigned 
align)
+{
+       bb_area_align(batch->cmds, align);
+}
 
-       return intel_batch_offset(batch, memcpy(p, ptr, size));
+static void intel_batch_state_align(struct intel_batchbuffer *batch, unsigned 
align)
+{
+       bb_area_align(batch->state, align);
 }
 
-uint32_t intel_batch_cmds_used(struct intel_batchbuffer *batch)
+unsigned intel_batch_num_cmds(struct intel_batchbuffer *batch)
 {
-       return batch->base_ptr - batch->base;
+       return bb_area_items(batch->cmds);
 }
 
-uint32_t intel_batch_total_used(struct intel_batchbuffer *batch)
+static unsigned intel_batch_num_state(struct intel_batchbuffer *batch)
 {
-       return batch->state_ptr - batch->base;
+       return bb_area_items(batch->state);
 }
 
-static uint32_t intel_batch_space(struct intel_batchbuffer *batch)
+struct bb_item *intel_batch_cmd_get(struct intel_batchbuffer *batch, unsigned 
i)
 {
-       return batch->state_base - batch->base_ptr;
+       return bb_area_get(batch->cmds, i);
 }
 
-int intel_batch_emit_dword(struct intel_batchbuffer *batch, uint32_t dword)
+struct bb_item *intel_batch_state_get(struct intel_batchbuffer *batch, 
unsigned i)
 {
-       uint32_t offset;
+       return bb_area_get(batch->state, i);
+}
 
-       if (batch->err)
-               return -1;
+uint32_t intel_batch_state_offset(struct intel_batchbuffer *batch, unsigned 
align)
+{
+       intel_batch_state_align(batch, align);
+       return bb_area_used(batch->state);
+}
 
-       if (intel_batch_space(batch) < 4) {
-               batch->err = -ENOSPC;
-               return -1;
-       }
+uint32_t intel_batch_state_alloc(struct intel_batchbuffer *batch, unsigned 
bytes, unsigned align,
+                                const char *str)
+{
+       unsigned offset;
+       unsigned dwords = bytes/4;
+       assert ((bytes % 4) == 0);
+       assert (bb_area_room(batch->state) >= bytes);
 
-       offset = intel_batch_offset(batch, batch->base_ptr);
+       offset = intel_batch_state_offset(batch, align);
 
-       *(uint32_t *) (batch->base_ptr) = dword;
-       batch->base_ptr += 4;
+       while (dwords--)
+               bb_area_emit(batch->state, 0, UNINITIALIZED, str);
 
        return offset;
 }
 
-int intel_batch_emit_reloc(struct intel_batchbuffer *batch,
-                          const uint32_t delta)
+uint32_t intel_batch_state_copy(struct intel_batchbuffer *batch,
+                               void *d, unsigned bytes,
+                               unsigned align,
+                               const char *str)
 {
-       uint32_t offset;
+       unsigned offset;
+       unsigned i;
+       unsigned dwords = bytes/4;
+       assert (d);
+       assert ((bytes % 4) == 0);
+       assert (bb_area_room(batch->state) >= bytes);
+
+       offset = intel_batch_state_offset(batch, align);
 
-       if (batch->err)
-               return -1;
+       for (i = 0; i < dwords; i++) {
+               char offsetinside[80];
+               sprintf(offsetinside, "%s: 0x%x", str, i * 4);
 
-       if (delta >= batch->size) {
-               batch->err = -EINVAL;
-               return -1;
+               uint32_t *s = (uint32_t *)(uint8_t *)d + i;
+               bb_area_emit(batch->state, *s, STATE, offsetinside);
        }
 
-       offset = intel_batch_emit_dword(batch, delta);
+       return offset;
+}
+
+void intel_batch_relocate_state(struct intel_batchbuffer *batch)
+{
+       unsigned int i;
+
+       assert (batch->state_start_offset == -1);
 
-       if (batch->err)
-               return -1;
+       batch->cmds_end_offset = bb_area_used(batch->cmds) - 4;
 
-       if (batch->num_relocs >= MAX_RELOCS) {
-               batch->err = -ENOSPC;
-               return -1;
+       /* Hardcoded, could track max align done also */
+       intel_batch_cmd_align(batch, 64);
+
+       batch->state_start_offset = bb_area_used(batch->cmds);
+
+       for (i = 0; i < bb_area_items(batch->state); i++) {
+               const struct bb_item *s = bb_area_get(batch->state, i);
+
+               bb_area_emit(batch->cmds, s->data, s->type, s->str);
        }
 
-       batch->relocs[batch->num_relocs++] = offset;
+       for (i = 0; i < bb_area_items(batch->cmds); i++) {
+               struct bb_item *s = bb_area_get(batch->cmds, i);
 
-       return offset;
+               if (s->type == STATE_OFFSET || s->type == RELOC_STATE)
+                       s->data += batch->state_start_offset;
+       }
+}
+
+const char *intel_batch_type_as_str(const struct bb_item *item)
+{
+       switch (item->type) {
+       case UNINITIALIZED:
+               return "UNINITIALIZED";
+       case CMD:
+               return "CMD";
+       case STATE:
+               return "STATE";
+       case PAD:
+               return "PAD";
+       case RELOC:
+               return "RELOC";
+       case RELOC_STATE:
+               return "RELOC_STATE";
+       case STATE_OFFSET:
+               return "STATE_OFFSET";
+       }
+
+       return "UNKNOWN";
 }
diff --git a/tools/null_state_gen/intel_batchbuffer.h 
b/tools/null_state_gen/intel_batchbuffer.h
index f5c29db..e44c5c9 100644
--- a/tools/null_state_gen/intel_batchbuffer.h
+++ b/tools/null_state_gen/intel_batchbuffer.h
@@ -34,58 +34,64 @@
 #include <stdint.h>
 
 #define MAX_RELOCS 64
+#define MAX_ITEMS 4096
+#define MAX_STRLEN 256
+
 #define ALIGN(x, y) (((x) + (y)-1) & ~((y)-1))
 
-struct intel_batchbuffer {
-       int err;
-       uint8_t *base;
-       uint8_t *base_ptr;
-       uint8_t *state_base;
-       uint8_t *state_ptr;
-       int size;
-
-       uint32_t relocs[MAX_RELOCS];
-       uint32_t num_relocs;
+typedef enum {
+       UNINITIALIZED,
+       CMD,
+       STATE,
+       RELOC,
+       RELOC_STATE,
+       STATE_OFFSET,
+       PAD,
+} item_type;
+
+struct bb_item {
+       uint32_t data;
+       item_type type;
+       char str[MAX_STRLEN];
 };
 
-#define OUT_BATCH(d) intel_batch_emit_dword(batch, d)
-#define OUT_RELOC(batch, read_domains, write_domain, delta) \
-       intel_batch_emit_reloc(batch, delta)
-
-int intel_batch_reset(struct intel_batchbuffer *batch,
-                      void *p,
-                      uint32_t size, uint32_t split_off);
-
-uint32_t intel_batch_state_used(struct intel_batchbuffer *batch);
+struct bb_area {
+       struct bb_item item[MAX_ITEMS];
+       unsigned long num_items;
+};
 
-void *intel_batch_state_alloc(struct intel_batchbuffer *batch,
-                             uint32_t size,
-                             uint32_t align);
+struct intel_batchbuffer {
+       struct bb_area *cmds;
+       struct bb_area *state;
+       unsigned long cmds_end_offset;
+       unsigned long state_start_offset;
+};
 
-int intel_batch_offset(struct intel_batchbuffer *batch, const void *ptr);
+struct intel_batchbuffer *intel_batchbuffer_create(void);
 
-int intel_batch_state_copy(struct intel_batchbuffer *batch,
-                          const void *ptr,
-                          const uint32_t size,
-                          const uint32_t align);
+#define OUT_BATCH(d) bb_area_emit(batch->cmds, d, CMD, #d)
+#define OUT_BATCH_STATE_OFFSET(d) bb_area_emit(batch->cmds, d, STATE_OFFSET, 
#d)
+#define OUT_RELOC(batch, read_domain, write_domain, d) 
bb_area_emit(batch->cmds, d, RELOC, #d)
+#define OUT_RELOC_STATE(batch, read_domain, write_domain, d) 
bb_area_emit(batch->cmds, d, RELOC_STATE, #d);
+#define OUT_STATE(d) bb_area_emit(batch->state, d, STATE, #d)
+#define OUT_STATE_OFFSET(offset) bb_area_emit(batch->state, offset, 
STATE_OFFSET, #offset)
+#define OUT_STATE_STRUCT(name, align) intel_batch_state_copy(batch, &name, 
sizeof(name), align, #name " " #align)
 
-uint32_t intel_batch_cmds_used(struct intel_batchbuffer *batch);
+uint32_t intel_batch_state_copy(struct intel_batchbuffer *batch, void *d, 
unsigned bytes, unsigned align,
+                               const char *name);
+uint32_t intel_batch_state_alloc(struct intel_batchbuffer *batch, unsigned 
bytes, unsigned align,
+                                const char *name);
 
-int intel_batch_emit_dword(struct intel_batchbuffer *batch, uint32_t dword);
+unsigned intel_batch_num_cmds(struct intel_batchbuffer *batch);
 
-int intel_batch_emit_reloc(struct intel_batchbuffer *batch,
-                          const uint32_t delta);
+struct bb_item *intel_batch_cmd_get(struct intel_batchbuffer *batch, unsigned 
i);
+int intel_batch_is_reloc(struct intel_batchbuffer *batch, unsigned i);
 
-uint32_t intel_batch_total_used(struct intel_batchbuffer *batch);
+void intel_batch_relocate_state(struct intel_batchbuffer *batch);
 
-static inline int intel_batch_error(struct intel_batchbuffer *batch)
-{
-       return batch->err;
-}
+const char *intel_batch_type_as_str(const struct bb_item *item);
 
-static inline uint32_t intel_batch_state_start(struct intel_batchbuffer *batch)
-{
-       return batch->state_base - batch->base;
-}
+void bb_area_emit(struct bb_area *a, uint32_t dword, item_type type, const 
char *str);
+void bb_area_emit_offset(struct bb_area *a, unsigned i, uint32_t dword, 
item_type type, const char *str);
 
 #endif
diff --git a/tools/null_state_gen/intel_null_state_gen.c 
b/tools/null_state_gen/intel_null_state_gen.c
index 945926f..b337706 100644
--- a/tools/null_state_gen/intel_null_state_gen.c
+++ b/tools/null_state_gen/intel_null_state_gen.c
@@ -11,6 +11,8 @@ extern int gen6_setup_null_render_state(struct 
intel_batchbuffer *batch);
 extern int gen7_setup_null_render_state(struct intel_batchbuffer *batch);
 extern int gen8_setup_null_render_state(struct intel_batchbuffer *batch);
 
+static int debug = 0;
+
 static void print_usage(char *s)
 {
        fprintf(stderr, "%s: <gen>\n"
@@ -18,17 +20,6 @@ static void print_usage(char *s)
               s);
 }
 
-static int is_reloc(struct intel_batchbuffer *batch, uint32_t offset)
-{
-       int i;
-
-       for (i = 0; i < batch->num_relocs; i++)
-               if (batch->relocs[i] == offset)
-                       return 1;
-
-       return 0;
-}
-
 static int print_state(int gen, struct intel_batchbuffer *batch)
 {
        int i;
@@ -36,33 +27,37 @@ static int print_state(int gen, struct intel_batchbuffer 
*batch)
        printf("#include \"intel_renderstate.h\"\n\n");
 
        printf("static const u32 gen%d_null_state_relocs[] = {\n", gen);
-       for (i = 0; i < batch->num_relocs; i++) {
-               printf("\t0x%08x,\n", batch->relocs[i]);
+       for (i = 0; i < batch->cmds->num_items; i++) {
+               if (intel_batch_is_reloc(batch, i))
+                       printf("\t0x%08x,\n", i * 4);
        }
        printf("\t%d,\n", -1);
        printf("};\n\n");
 
        printf("static const u32 gen%d_null_state_batch[] = {\n", gen);
-       for (i = 0; i < batch->size; i += 4) {
-               const uint32_t *p = (void *)batch->base + i;
-               printf("\t0x%08x,", *p);
+       for (i = 0; i < intel_batch_num_cmds(batch); i++) {
+               const struct bb_item *cmd = intel_batch_cmd_get(batch, i);
+               printf("\t0x%08x,", cmd->data);
+
+               if (debug)
+                       printf("\t /* 0x%08x %s '%s' */", i * 4,
+                              intel_batch_type_as_str(cmd), cmd->str);
 
-               if (i == intel_batch_cmds_used(batch) - 4)
+               if (i * 4 == batch->cmds_end_offset)
                        printf("\t /* cmds end */");
 
-               if (i == intel_batch_state_start(batch))
-                       printf("\t /* state start */");
+               if (intel_batch_is_reloc(batch, i))
+                       printf("\t /* reloc */");
 
+               if (i * 4 == batch->state_start_offset)
+                       printf("\t /* state start */");
 
-               if (i == intel_batch_state_start(batch) +
-                   intel_batch_state_used(batch) - 4)
+               if (i == intel_batch_num_cmds(batch) - 1)
                        printf("\t /* state end */");
 
-               if (is_reloc(batch, i))
-                       printf("\t /* reloc */");
-
                printf("\n");
        }
+
        printf("};\n\nRO_RENDERSTATE(%d);\n", gen);
 
        return 0;
@@ -70,23 +65,14 @@ static int print_state(int gen, struct intel_batchbuffer 
*batch)
 
 static int do_generate(int gen)
 {
-       int initial_size = 8192;
-       struct intel_batchbuffer batch;
-       void *p;
+       struct intel_batchbuffer *batch;
        int ret = -EINVAL;
-       uint32_t cmd_len, state_len, size;
        int (*null_state_gen)(struct intel_batchbuffer *batch) = NULL;
 
-       p = malloc(initial_size);
-       if (p == NULL)
+       batch = intel_batchbuffer_create();
+       if (batch == NULL)
                return -ENOMEM;
 
-       assert(ALIGN(initial_size/2, STATE_ALIGN) == initial_size/2);
-
-       ret = intel_batch_reset(&batch, p, initial_size, initial_size/2);
-       if (ret)
-               goto out;
-
        switch (gen) {
        case 6:
                null_state_gen = gen6_setup_null_render_state;
@@ -103,50 +89,26 @@ static int do_generate(int gen)
 
        if (null_state_gen == NULL) {
                printf("no generator found for %d\n", gen);
-               ret = -EINVAL;
-               goto out;
+               return -EINVAL;
        }
 
-       ret = null_state_gen(&batch);
-       if (ret < 0)
-               goto out;
-
-       cmd_len = intel_batch_cmds_used(&batch);
-       state_len = intel_batch_state_used(&batch);
-
-       size = cmd_len + state_len + ALIGN(cmd_len, STATE_ALIGN) - cmd_len;
-
-       ret = intel_batch_reset(&batch, p, size, ALIGN(cmd_len, STATE_ALIGN));
-       if (ret)
-               goto out;
+       null_state_gen(batch);
+       intel_batch_relocate_state(batch);
 
-       ret = null_state_gen(&batch);
-       if (ret < 0)
-               goto out;
+       ret = print_state(gen, batch);
 
-       assert(cmd_len == intel_batch_cmds_used(&batch));
-       assert(state_len == intel_batch_state_used(&batch));
-       assert(size == ret);
-
-       /* Batch buffer needs to end */
-       assert(*(uint32_t *)(p + cmd_len - 4) == (0xA << 23));
-
-       ret = print_state(gen, &batch);
-out:
-       free(p);
-
-       if (ret < 0)
-               return ret;
-
-       return 0;
+       return ret;
 }
 
 int main(int argc, char *argv[])
 {
-       if (argc != 2) {
+       if (argc < 2) {
                print_usage(argv[0]);
                return 1;
        }
 
+       if (argc > 2)
+               debug = 1;
+
        return do_generate(atoi(argv[1]));
 }
diff --git a/tools/null_state_gen/intel_renderstate_gen6.c 
b/tools/null_state_gen/intel_renderstate_gen6.c
index f169d02..5f922f7 100644
--- a/tools/null_state_gen/intel_renderstate_gen6.c
+++ b/tools/null_state_gen/intel_renderstate_gen6.c
@@ -33,31 +33,23 @@ static const uint32_t ps_kernel_nomask_affine[][4] = {
 static uint32_t
 gen6_bind_buf_null(struct intel_batchbuffer *batch)
 {
-       struct gen6_surface_state *ss;
-       int ret;
+       struct gen6_surface_state ss;
+       memset(&ss, 0, sizeof(ss));
 
-       ss = intel_batch_state_alloc(batch, sizeof(*ss), 32);
-       if (ss == NULL)
-               return -1;
-
-       memset(ss, 0, sizeof(*ss));
-
-       return intel_batch_offset(batch, ss);
+       return OUT_STATE_STRUCT(ss, 32);
 }
 
 static uint32_t
 gen6_bind_surfaces(struct intel_batchbuffer *batch)
 {
-       uint32_t *binding_table;
+       unsigned offset;
 
-       binding_table = intel_batch_state_alloc(batch, 32, 32);
-       if (binding_table == NULL)
-               return -1;
+       offset = intel_batch_state_alloc(batch, 32, 32, "bind surfaces");
 
-       binding_table[0] = gen6_bind_buf_null(batch);
-       binding_table[1] = gen6_bind_buf_null(batch);
+       bb_area_emit_offset(batch->state, offset, gen6_bind_buf_null(batch), 
STATE_OFFSET, "bind 1");
+       bb_area_emit_offset(batch->state, offset + 4, 
gen6_bind_buf_null(batch), STATE_OFFSET, "bind 2");
 
-       return intel_batch_offset(batch, binding_table);
+       return offset;
 }
 
 static void
@@ -108,7 +100,7 @@ gen6_emit_viewports(struct intel_batchbuffer *batch, 
uint32_t cc_vp)
                  (4 - 2));
        OUT_BATCH(0);
        OUT_BATCH(0);
-       OUT_BATCH(cc_vp);
+       OUT_BATCH_STATE_OFFSET(cc_vp);
 }
 
 static void
@@ -202,7 +194,7 @@ static void
 gen6_emit_cc(struct intel_batchbuffer *batch, uint32_t blend)
 {
        OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
-       OUT_BATCH(blend | 1);
+       OUT_BATCH_STATE_OFFSET(blend | 1);
        OUT_BATCH(1024 | 1);
        OUT_BATCH(1024 | 1);
 }
@@ -215,7 +207,7 @@ gen6_emit_sampler(struct intel_batchbuffer *batch, uint32_t 
state)
                  (4 - 2));
        OUT_BATCH(0); /* VS */
        OUT_BATCH(0); /* GS */
-       OUT_BATCH(state);
+       OUT_BATCH_STATE_OFFSET(state);
 }
 
 static void
@@ -249,7 +241,7 @@ static void
 gen6_emit_wm(struct intel_batchbuffer *batch, int kernel)
 {
        OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2));
-       OUT_BATCH(kernel);
+       OUT_BATCH_STATE_OFFSET(kernel);
        OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT |
                  2 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
        OUT_BATCH(0);
@@ -271,7 +263,7 @@ gen6_emit_binding_table(struct intel_batchbuffer *batch, 
uint32_t wm_table)
                  (4 - 2));
        OUT_BATCH(0);           /* vs */
        OUT_BATCH(0);           /* gs */
-       OUT_BATCH(wm_table);
+       OUT_BATCH_STATE_OFFSET(wm_table);
 }
 
 static void
@@ -325,36 +317,32 @@ gen6_emit_vertex_elements(struct intel_batchbuffer *batch)
 static uint32_t
 gen6_create_cc_viewport(struct intel_batchbuffer *batch)
 {
-       struct gen6_cc_viewport *vp;
+       struct gen6_cc_viewport vp;
 
-       vp = intel_batch_state_alloc(batch, sizeof(*vp), 32);
-       if (vp == NULL)
-               return -1;
+       memset(&vp, 0, sizeof(vp));
 
-       vp->min_depth = -1.e35;
-       vp->max_depth = 1.e35;
+       vp.min_depth = -1.e35;
+       vp.max_depth = 1.e35;
 
-       return intel_batch_offset(batch, vp);
+       return OUT_STATE_STRUCT(vp, 32);
 }
 
 static uint32_t
 gen6_create_cc_blend(struct intel_batchbuffer *batch)
 {
-       struct gen6_blend_state *blend;
+       struct gen6_blend_state blend;
 
-       blend = intel_batch_state_alloc(batch, sizeof(*blend), 64);
-       if (blend == NULL)
-               return -1;
+       memset(&blend, 0, sizeof(blend));
 
-       blend->blend0.dest_blend_factor = GEN6_BLENDFACTOR_ZERO;
-       blend->blend0.source_blend_factor = GEN6_BLENDFACTOR_ONE;
-       blend->blend0.blend_func = GEN6_BLENDFUNCTION_ADD;
-       blend->blend0.blend_enable = 1;
+       blend.blend0.dest_blend_factor = GEN6_BLENDFACTOR_ZERO;
+       blend.blend0.source_blend_factor = GEN6_BLENDFACTOR_ONE;
+       blend.blend0.blend_func = GEN6_BLENDFUNCTION_ADD;
+       blend.blend0.blend_enable = 1;
 
-       blend->blend1.post_blend_clamp_enable = 1;
-       blend->blend1.pre_blend_clamp_enable = 1;
+       blend.blend1.post_blend_clamp_enable = 1;
+       blend.blend1.pre_blend_clamp_enable = 1;
 
-       return intel_batch_offset(batch, blend);
+       return OUT_STATE_STRUCT(blend, 64);
 }
 
 static uint32_t
@@ -362,7 +350,7 @@ gen6_create_kernel(struct intel_batchbuffer *batch)
 {
        return intel_batch_state_copy(batch, ps_kernel_nomask_affine,
                                      sizeof(ps_kernel_nomask_affine),
-                                     64);
+                                     64, "ps_kernel");
 }
 
 static uint32_t
@@ -370,70 +358,64 @@ gen6_create_sampler(struct intel_batchbuffer *batch,
                    sampler_filter_t filter,
                   sampler_extend_t extend)
 {
-       struct gen6_sampler_state *ss;
+       struct gen6_sampler_state ss;
 
-       ss = intel_batch_state_alloc(batch, sizeof(*ss), 32);
-       if (ss == NULL)
-               return -1;
+       memset(&ss, 0, sizeof(ss));
 
-       ss->ss0.lod_preclamp = 1;       /* GL mode */
+       ss.ss0.lod_preclamp = 1;        /* GL mode */
 
        /* We use the legacy mode to get the semantics specified by
         * the Render extension. */
-       ss->ss0.border_color_mode = GEN6_BORDER_COLOR_MODE_LEGACY;
+       ss.ss0.border_color_mode = GEN6_BORDER_COLOR_MODE_LEGACY;
 
        switch (filter) {
        default:
        case SAMPLER_FILTER_NEAREST:
-               ss->ss0.min_filter = GEN6_MAPFILTER_NEAREST;
-               ss->ss0.mag_filter = GEN6_MAPFILTER_NEAREST;
+               ss.ss0.min_filter = GEN6_MAPFILTER_NEAREST;
+               ss.ss0.mag_filter = GEN6_MAPFILTER_NEAREST;
                break;
        case SAMPLER_FILTER_BILINEAR:
-               ss->ss0.min_filter = GEN6_MAPFILTER_LINEAR;
-               ss->ss0.mag_filter = GEN6_MAPFILTER_LINEAR;
+               ss.ss0.min_filter = GEN6_MAPFILTER_LINEAR;
+               ss.ss0.mag_filter = GEN6_MAPFILTER_LINEAR;
                break;
        }
 
        switch (extend) {
        default:
        case SAMPLER_EXTEND_NONE:
-               ss->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
-               ss->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
-               ss->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
+               ss.ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
+               ss.ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
+               ss.ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
                break;
        case SAMPLER_EXTEND_REPEAT:
-               ss->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
-               ss->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
-               ss->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
+               ss.ss1.r_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
+               ss.ss1.s_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
+               ss.ss1.t_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
                break;
        case SAMPLER_EXTEND_PAD:
-               ss->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
-               ss->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
-               ss->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
+               ss.ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
+               ss.ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
+               ss.ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
                break;
        case SAMPLER_EXTEND_REFLECT:
-               ss->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
-               ss->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
-               ss->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
+               ss.ss1.r_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
+               ss.ss1.s_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
+               ss.ss1.t_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
                break;
        }
 
-       return intel_batch_offset(batch, ss);
+       return OUT_STATE_STRUCT(ss, 32);
 }
 
 static uint32_t
 gen6_create_vertex_buffer(struct intel_batchbuffer *batch)
 {
-       uint16_t *v;
-
-       v = intel_batch_state_alloc(batch, 2 * sizeof(uint16_t), 8);
-       if (v == NULL)
-               return -1;
+       uint16_t v[2];
 
        v[0] = 0;
        v[1] = 0;
 
-       return intel_batch_offset(batch, v);
+       return intel_batch_state_copy(batch, v, sizeof(v), 8, "vertex buffer");
 }
 
 static void gen6_emit_vertex_buffer(struct intel_batchbuffer *batch)
@@ -447,17 +429,15 @@ static void gen6_emit_vertex_buffer(struct 
intel_batchbuffer *batch)
                  0 << VB0_BUFFER_INDEX_SHIFT |
                  VB0_NULL_VERTEX_BUFFER |
                  0 << VB0_BUFFER_PITCH_SHIFT);
-       OUT_RELOC(batch, I915_GEM_DOMAIN_VERTEX, 0, offset);
-       OUT_RELOC(batch, I915_GEM_DOMAIN_VERTEX, 0, offset);
+       OUT_RELOC_STATE(batch, I915_GEM_DOMAIN_VERTEX, 0, offset);
+       OUT_RELOC_STATE(batch, I915_GEM_DOMAIN_VERTEX, 0, offset);
        OUT_BATCH(0);
 }
 
-int gen6_setup_null_render_state(struct intel_batchbuffer *batch)
+void gen6_setup_null_render_state(struct intel_batchbuffer *batch)
 {
        uint32_t wm_state, wm_kernel, wm_table;
-       uint32_t cc_vp, cc_blend, offset;
-       uint32_t batch_end;
-       int ret;
+       uint32_t cc_vp, cc_blend;
 
        wm_table  = gen6_bind_surfaces(batch);
        wm_kernel = gen6_create_kernel(batch);
@@ -492,10 +472,4 @@ int gen6_setup_null_render_state(struct intel_batchbuffer 
*batch)
        gen6_emit_vertex_buffer(batch);
 
        OUT_BATCH(MI_BATCH_BUFFER_END);
-
-       ret = intel_batch_error(batch);
-       if (ret == 0)
-               ret = intel_batch_total_used(batch);
-
-       return ret;
 }
diff --git a/tools/null_state_gen/intel_renderstate_gen7.c 
b/tools/null_state_gen/intel_renderstate_gen7.c
index 8fe8a80..22cd268 100644
--- a/tools/null_state_gen/intel_renderstate_gen7.c
+++ b/tools/null_state_gen/intel_renderstate_gen7.c
@@ -25,6 +25,7 @@
 #include "intel_batchbuffer.h"
 #include <lib/gen7_render.h>
 #include <lib/intel_reg.h>
+#include <string.h>
 #include <stdio.h>
 
 static const uint32_t ps_kernel[][4] = {
@@ -41,22 +42,7 @@ static const uint32_t ps_kernel[][4] = {
 static uint32_t
 gen7_bind_buf_null(struct intel_batchbuffer *batch)
 {
-       uint32_t *ss;
-
-       ss = intel_batch_state_alloc(batch, 8 * sizeof(*ss), 32);
-       if (ss == NULL)
-               return -1;
-
-       ss[0] = 0;
-       ss[1] = 0;
-       ss[2] = 0;
-       ss[3] = 0;
-       ss[4] = 0;
-       ss[5] = 0;
-       ss[6] = 0;
-       ss[7] = 0;
-
-       return intel_batch_offset(batch, ss);
+       return intel_batch_state_alloc(batch, 32, 32, "bind buf null");
 }
 
 static void
@@ -99,26 +85,7 @@ gen7_create_vertex_buffer(struct intel_batchbuffer *batch)
 {
        uint16_t *v;
 
-       v = intel_batch_state_alloc(batch, 12*sizeof(*v), 8);
-       if (v == NULL)
-               return -1;
-
-       v[0] = 0;
-       v[1] = 0;
-       v[2] = 0;
-       v[3] = 0;
-
-       v[4] = 0;
-       v[5] = 0;
-       v[6] = 0;
-       v[7] = 0;
-
-       v[8] = 0;
-       v[9] = 0;
-       v[10] = 0;
-       v[11] = 0;
-
-       return intel_batch_offset(batch, v);
+       return intel_batch_state_alloc(batch, 12*sizeof(*v), 8, "vertex 
buffer");
 }
 
 static void gen7_emit_vertex_buffer(struct intel_batchbuffer *batch)
@@ -134,7 +101,7 @@ static void gen7_emit_vertex_buffer(struct 
intel_batchbuffer *batch)
                  GEN7_VB0_NULL_VERTEX_BUFFER |
                  4*2 << GEN7_VB0_BUFFER_PITCH_SHIFT);
 
-       OUT_RELOC(batch, I915_GEM_DOMAIN_VERTEX, 0, offset);
+       OUT_RELOC_STATE(batch, I915_GEM_DOMAIN_VERTEX, 0, offset);
        OUT_BATCH(~0);
        OUT_BATCH(0);
 }
@@ -142,23 +109,21 @@ static void gen7_emit_vertex_buffer(struct 
intel_batchbuffer *batch)
 static uint32_t
 gen7_bind_surfaces(struct intel_batchbuffer *batch)
 {
-       uint32_t *binding_table;
+       unsigned offset;
 
-       binding_table = intel_batch_state_alloc(batch, 8, 32);
-       if (binding_table == NULL)
-               return -1;
+       offset = intel_batch_state_alloc(batch, 8, 32, "bind surfaces");
 
-       binding_table[0] = gen7_bind_buf_null(batch);
-       binding_table[1] = gen7_bind_buf_null(batch);
+       bb_area_emit_offset(batch->state, offset, gen7_bind_buf_null(batch), 
STATE_OFFSET, "bind 1");
+       bb_area_emit_offset(batch->state, offset + 4, 
gen7_bind_buf_null(batch), STATE_OFFSET, "bind 2");
 
-       return intel_batch_offset(batch, binding_table);
+       return offset;
 }
 
 static void
 gen7_emit_binding_table(struct intel_batchbuffer *batch)
 {
        OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
-       OUT_BATCH(gen7_bind_surfaces(batch));
+       OUT_BATCH_STATE_OFFSET(gen7_bind_surfaces(batch));
 }
 
 static void
@@ -174,19 +139,16 @@ gen7_emit_drawing_rectangle(struct intel_batchbuffer 
*batch)
 static uint32_t
 gen7_create_blend_state(struct intel_batchbuffer *batch)
 {
-       struct gen7_blend_state *blend;
-
-       blend = intel_batch_state_alloc(batch, sizeof(*blend), 64);
-       if (blend == NULL)
-               return -1;
+       struct gen7_blend_state blend;
+       memset(&blend, 0, sizeof(blend));
 
-       blend->blend0.dest_blend_factor = GEN7_BLENDFACTOR_ZERO;
-       blend->blend0.source_blend_factor = GEN7_BLENDFACTOR_ONE;
-       blend->blend0.blend_func = GEN7_BLENDFUNCTION_ADD;
-       blend->blend1.post_blend_clamp_enable = 1;
-       blend->blend1.pre_blend_clamp_enable = 1;
+       blend.blend0.dest_blend_factor = GEN7_BLENDFACTOR_ZERO;
+       blend.blend0.source_blend_factor = GEN7_BLENDFACTOR_ONE;
+       blend.blend0.blend_func = GEN7_BLENDFUNCTION_ADD;
+       blend.blend1.post_blend_clamp_enable = 1;
+       blend.blend1.pre_blend_clamp_enable = 1;
 
-       return intel_batch_offset(batch, blend);
+       return OUT_STATE_STRUCT(blend, 64);
 }
 
 static void
@@ -208,54 +170,48 @@ gen7_emit_state_base_address(struct intel_batchbuffer 
*batch)
 static uint32_t
 gen7_create_cc_viewport(struct intel_batchbuffer *batch)
 {
-       struct gen7_cc_viewport *vp;
+       struct gen7_cc_viewport vp;
+       memset(&vp, 0, sizeof(vp));
 
-       vp = intel_batch_state_alloc(batch, sizeof(*vp), 32);
-       if (vp == NULL)
-               return -1;
+       vp.min_depth = -1.e35;
+       vp.max_depth = 1.e35;
 
-       vp->min_depth = -1.e35;
-       vp->max_depth = 1.e35;
-
-       return intel_batch_offset(batch, vp);
+       return OUT_STATE_STRUCT(vp, 32);
 }
 
 static void
 gen7_emit_cc(struct intel_batchbuffer *batch)
 {
        OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
-       OUT_BATCH(gen7_create_blend_state(batch));
+       OUT_BATCH_STATE_OFFSET(gen7_create_blend_state(batch));
 
        OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
-       OUT_BATCH(gen7_create_cc_viewport(batch));
+       OUT_BATCH_STATE_OFFSET(gen7_create_cc_viewport(batch));
 }
 
 static uint32_t
 gen7_create_sampler(struct intel_batchbuffer *batch)
 {
-       struct gen7_sampler_state *ss;
-
-       ss = intel_batch_state_alloc(batch, sizeof(*ss), 32);
-       if (ss == NULL)
-               return -1;
+       struct gen7_sampler_state ss;
+       memset(&ss, 0, sizeof(ss));
 
-       ss->ss0.min_filter = GEN7_MAPFILTER_NEAREST;
-       ss->ss0.mag_filter = GEN7_MAPFILTER_NEAREST;
+       ss.ss0.min_filter = GEN7_MAPFILTER_NEAREST;
+       ss.ss0.mag_filter = GEN7_MAPFILTER_NEAREST;
 
-       ss->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
-       ss->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
-       ss->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
+       ss.ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
+       ss.ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
+       ss.ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
 
-       ss->ss3.non_normalized_coord = 1;
+       ss.ss3.non_normalized_coord = 1;
 
-       return intel_batch_offset(batch, ss);
+       return OUT_STATE_STRUCT(ss, 32);
 }
 
 static void
 gen7_emit_sampler(struct intel_batchbuffer *batch)
 {
        OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
-       OUT_BATCH(gen7_create_sampler(batch));
+       OUT_BATCH_STATE_OFFSET(gen7_create_sampler(batch));
 }
 
 static void
@@ -406,8 +362,8 @@ gen7_emit_ps(struct intel_batchbuffer *batch)
                threads = 40 << IVB_PS_MAX_THREADS_SHIFT;
 
        OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2));
-       OUT_BATCH(intel_batch_state_copy(batch, ps_kernel,
-                                        sizeof(ps_kernel), 64));
+       OUT_BATCH_STATE_OFFSET(intel_batch_state_copy(batch, ps_kernel,
+                                                     sizeof(ps_kernel), 64, 
"ps kernel"));
        OUT_BATCH(1 << GEN7_PS_SAMPLER_COUNT_SHIFT |
                  2 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT);
        OUT_BATCH(0); /* scratch address */
@@ -458,7 +414,7 @@ gen7_emit_null_depth_buffer(struct intel_batchbuffer *batch)
        OUT_BATCH(0);
 }
 
-int gen7_setup_null_render_state(struct intel_batchbuffer *batch)
+void gen7_setup_null_render_state(struct intel_batchbuffer *batch)
 {
        int ret;
 
@@ -496,10 +452,4 @@ int gen7_setup_null_render_state(struct intel_batchbuffer 
*batch)
        OUT_BATCH(0);   /* index buffer offset, ignored */
 
        OUT_BATCH(MI_BATCH_BUFFER_END);
-
-       ret = intel_batch_error(batch);
-       if (ret == 0)
-               ret = intel_batch_total_used(batch);
-
-       return ret;
 }
diff --git a/tools/null_state_gen/intel_renderstate_gen8.c 
b/tools/null_state_gen/intel_renderstate_gen8.c
index 807c2c8..4812b51 100644
--- a/tools/null_state_gen/intel_renderstate_gen8.c
+++ b/tools/null_state_gen/intel_renderstate_gen8.c
@@ -39,32 +39,21 @@ static const uint32_t ps_kernel[][4] = {
 static uint32_t
 gen8_bind_buf_null(struct intel_batchbuffer *batch)
 {
-       struct gen8_surface_state *ss;
+       struct gen8_surface_state ss;
+       memset(&ss, 0, sizeof(ss));
 
-       ss = intel_batch_state_alloc(batch, sizeof(*ss), 64);
-       if (ss == NULL)
-               return -1;
-
-       memset(ss, 0, sizeof(*ss));
-
-       return intel_batch_offset(batch, ss);
+       return OUT_STATE_STRUCT(ss, 64);
 }
 
 static uint32_t
 gen8_bind_surfaces(struct intel_batchbuffer *batch)
 {
-       uint32_t *binding_table, offset;
-
-       binding_table = intel_batch_state_alloc(batch, 8, 32);
-       if (binding_table == NULL)
-               return -1;
+       unsigned offset;
 
-       offset = intel_batch_offset(batch, binding_table);
+       offset = intel_batch_state_alloc(batch, 8, 32, "bind surfaces");
 
-       binding_table[0] =
-               gen8_bind_buf_null(batch);
-       binding_table[1] =
-               gen8_bind_buf_null(batch);
+       bb_area_emit_offset(batch->state, offset, gen8_bind_buf_null(batch), 
STATE_OFFSET, "bind 1");
+       bb_area_emit_offset(batch->state, offset + 4, 
gen8_bind_buf_null(batch), STATE_OFFSET, "bind 2");
 
        return offset;
 }
@@ -72,26 +61,20 @@ gen8_bind_surfaces(struct intel_batchbuffer *batch)
 /* Mostly copy+paste from gen6, except wrap modes moved */
 static uint32_t
 gen8_create_sampler(struct intel_batchbuffer *batch) {
-       struct gen8_sampler_state *ss;
-       uint32_t offset;
-
-       ss = intel_batch_state_alloc(batch, sizeof(*ss), 64);
-       if (ss == NULL)
-               return -1;
-
-       offset = intel_batch_offset(batch, ss);
+       struct gen8_sampler_state ss;
+       memset(&ss, 0, sizeof(ss));
 
-       ss->ss0.min_filter = GEN6_MAPFILTER_NEAREST;
-       ss->ss0.mag_filter = GEN6_MAPFILTER_NEAREST;
-       ss->ss3.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
-       ss->ss3.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
-       ss->ss3.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
+       ss.ss0.min_filter = GEN6_MAPFILTER_NEAREST;
+       ss.ss0.mag_filter = GEN6_MAPFILTER_NEAREST;
+       ss.ss3.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
+       ss.ss3.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
+       ss.ss3.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
 
        /* I've experimented with non-normalized coordinates and using the LD
         * sampler fetch, but couldn't make it work. */
-       ss->ss3.non_normalized_coord = 0;
+       ss.ss3.non_normalized_coord = 0;
 
-       return offset;
+       return OUT_STATE_STRUCT(ss, 64);
 }
 
 static uint32_t
@@ -99,7 +82,7 @@ gen8_fill_ps(struct intel_batchbuffer *batch,
             const uint32_t kernel[][4],
             size_t size)
 {
-       return intel_batch_state_copy(batch, kernel, size, 64);
+       return intel_batch_state_copy(batch, kernel, size, 64, "ps kernel");
 }
 
 /**
@@ -115,13 +98,9 @@ gen8_fill_ps(struct intel_batchbuffer *batch,
 static uint32_t
 gen7_fill_vertex_buffer_data(struct intel_batchbuffer *batch)
 {
-       uint16_t *start;
-
-       start = intel_batch_state_alloc(batch, 2 * sizeof(*start), 8);
-       start[0] = 0;
-       start[1] = 0;
+       uint16_t *v;
 
-       return intel_batch_offset(batch, start);
+       return intel_batch_state_alloc(batch, 2 * sizeof(*v), 8, "vertex 
buffer");
 }
 
 /**
@@ -194,7 +173,7 @@ static void gen7_emit_vertex_buffer(struct 
intel_batchbuffer *batch,
                  GEN7_VB0_BUFFER_ADDR_MOD_EN | /* Address Modify Enable */
                  VB0_NULL_VERTEX_BUFFER |
                  0 << VB0_BUFFER_PITCH_SHIFT);
-       OUT_RELOC(batch, I915_GEM_DOMAIN_VERTEX, 0, offset);
+       OUT_RELOC_STATE(batch, I915_GEM_DOMAIN_VERTEX, 0, offset);
        OUT_BATCH(0);
        OUT_BATCH(0);
 }
@@ -202,94 +181,68 @@ static void gen7_emit_vertex_buffer(struct 
intel_batchbuffer *batch,
 static uint32_t
 gen6_create_cc_state(struct intel_batchbuffer *batch)
 {
-       struct gen6_color_calc_state *cc_state;
-       uint32_t offset;
-
-       cc_state = intel_batch_state_alloc(batch, sizeof(*cc_state), 64);
-       if (cc_state == NULL)
-               return -1;
+       struct gen6_color_calc_state cc_state;
+       memset(&cc_state, 0, sizeof(cc_state));
 
-       offset = intel_batch_offset(batch, cc_state);
-
-       return offset;
+       return OUT_STATE_STRUCT(cc_state, 64);
 }
 
 static uint32_t
 gen8_create_blend_state(struct intel_batchbuffer *batch)
 {
-       struct gen8_blend_state *blend;
+       struct gen8_blend_state blend;
        int i;
-       uint32_t offset;
 
-       blend = intel_batch_state_alloc(batch, sizeof(*blend), 64);
-       if (blend == NULL)
-               return -1;
-
-       offset = intel_batch_offset(batch, blend);
+       memset(&blend, 0, sizeof(blend));
 
        for (i = 0; i < 16; i++) {
-               blend->bs[i].dest_blend_factor = GEN6_BLENDFACTOR_ZERO;
-               blend->bs[i].source_blend_factor = GEN6_BLENDFACTOR_ONE;
-               blend->bs[i].color_blend_func = GEN6_BLENDFUNCTION_ADD;
-               blend->bs[i].pre_blend_color_clamp = 1;
-               blend->bs[i].color_buffer_blend = 0;
+               blend.bs[i].dest_blend_factor = GEN6_BLENDFACTOR_ZERO;
+               blend.bs[i].source_blend_factor = GEN6_BLENDFACTOR_ONE;
+               blend.bs[i].color_blend_func = GEN6_BLENDFUNCTION_ADD;
+               blend.bs[i].pre_blend_color_clamp = 1;
+               blend.bs[i].color_buffer_blend = 0;
        }
 
-       return offset;
+       return OUT_STATE_STRUCT(blend, 64);
 }
 
 static uint32_t
 gen6_create_cc_viewport(struct intel_batchbuffer *batch)
 {
-       struct gen6_cc_viewport *vp;
-       uint32_t offset;
-
-       vp = intel_batch_state_alloc(batch, sizeof(*vp), 32);
-       if (vp == NULL)
-               return -1;
+       struct gen6_cc_viewport vp;
 
-       offset = intel_batch_offset(batch, vp);
+       memset(&vp, 0, sizeof(vp));
 
        /* XXX I don't understand this */
-       vp->min_depth = -1.e35;
-       vp->max_depth = 1.e35;
+       vp.min_depth = -1.e35;
+       vp.max_depth = 1.e35;
 
-       return offset;
+       return OUT_STATE_STRUCT(vp, 32);
 }
 
 static uint32_t
 gen7_create_sf_clip_viewport(struct intel_batchbuffer *batch) {
        /* XXX these are likely not needed */
-       struct gen7_sf_clip_viewport *scv_state;
-       uint32_t offset;
-
-       scv_state = intel_batch_state_alloc(batch, sizeof(*scv_state), 64);
-       if (scv_state == NULL)
-               return -1;
+       struct gen7_sf_clip_viewport scv_state;
 
-       offset = intel_batch_offset(batch, scv_state);
+       memset(&scv_state, 0, sizeof(scv_state));
 
-       scv_state->guardband.xmin = 0;
-       scv_state->guardband.xmax = 1.0f;
-       scv_state->guardband.ymin = 0;
-       scv_state->guardband.ymax = 1.0f;
+       scv_state.guardband.xmin = 0;
+       scv_state.guardband.xmax = 1.0f;
+       scv_state.guardband.ymin = 0;
+       scv_state.guardband.ymax = 1.0f;
 
-       return offset;
+       return OUT_STATE_STRUCT(scv_state, 64);
 }
 
 static uint32_t
 gen6_create_scissor_rect(struct intel_batchbuffer *batch)
 {
-       struct gen6_scissor_rect *scissor;
-       uint32_t offset;
-
-       scissor = intel_batch_state_alloc(batch, sizeof(*scissor), 64);
-       if (scissor == NULL)
-               return -1;
+       struct gen6_scissor_rect scissor;
 
-       offset = intel_batch_offset(batch, scissor);
+       memset(&scissor, 0, sizeof(scissor));
 
-       return offset;
+       return OUT_STATE_STRUCT(scissor, 64);
 }
 
 static void
@@ -371,10 +324,10 @@ gen7_emit_urb(struct intel_batchbuffer *batch) {
 static void
 gen8_emit_cc(struct intel_batchbuffer *batch) {
        OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS);
-       OUT_BATCH(cc.blend_state | 1);
+       OUT_BATCH_STATE_OFFSET(cc.blend_state | 1);
 
        OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS);
-       OUT_BATCH(cc.cc_state | 1);
+       OUT_BATCH_STATE_OFFSET(cc.cc_state | 1);
 }
 
 static void
@@ -596,7 +549,7 @@ gen8_emit_ps(struct intel_batchbuffer *batch, uint32_t 
kernel) {
        OUT_BATCH(0);
 
        OUT_BATCH(GEN7_3DSTATE_PS | (12-2));
-       OUT_BATCH(kernel);
+       OUT_BATCH_STATE_OFFSET(kernel);
        OUT_BATCH(0); /* kernel hi */
        OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT |
                  2 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
@@ -664,7 +617,7 @@ static void gen8_emit_vf_topology(struct intel_batchbuffer 
*batch)
 }
 
 /* Vertex elements MUST be defined before this according to spec */
-static void gen8_emit_primitive(struct intel_batchbuffer *batch, uint32_t 
offset)
+static void gen8_emit_primitive(struct intel_batchbuffer *batch)
 {
        OUT_BATCH(GEN8_3DSTATE_VF_INSTANCING | (3 - 2));
        OUT_BATCH(0);
@@ -679,7 +632,7 @@ static void gen8_emit_primitive(struct intel_batchbuffer 
*batch, uint32_t offset
        OUT_BATCH(0);   /* index buffer offset, ignored */
 }
 
-int gen8_setup_null_render_state(struct intel_batchbuffer *batch)
+void gen8_setup_null_render_state(struct intel_batchbuffer *batch)
 {
        uint32_t ps_sampler_state, ps_kernel_off, ps_binding_table;
        uint32_t scissor_state;
@@ -709,9 +662,9 @@ int gen8_setup_null_render_state(struct intel_batchbuffer 
*batch)
        gen8_emit_state_base_address(batch);
 
        OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC);
-       OUT_BATCH(viewport.cc_state);
+       OUT_BATCH_STATE_OFFSET(viewport.cc_state);
        OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
-       OUT_BATCH(viewport.sf_clip_state);
+       OUT_BATCH_STATE_OFFSET(viewport.sf_clip_state);
 
        gen7_emit_urb(batch);
 
@@ -732,15 +685,15 @@ int gen8_setup_null_render_state(struct intel_batchbuffer 
*batch)
        gen8_emit_sf(batch);
 
        OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS);
-       OUT_BATCH(ps_binding_table);
+       OUT_BATCH_STATE_OFFSET(ps_binding_table);
 
        OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS);
-       OUT_BATCH(ps_sampler_state);
+       OUT_BATCH_STATE_OFFSET(ps_sampler_state);
 
        gen8_emit_ps(batch, ps_kernel_off);
 
        OUT_BATCH(GEN6_3DSTATE_SCISSOR_STATE_POINTERS);
-       OUT_BATCH(scissor_state);
+       OUT_BATCH_STATE_OFFSET(scissor_state);
 
        gen8_emit_depth(batch);
 
@@ -752,13 +705,7 @@ int gen8_setup_null_render_state(struct intel_batchbuffer 
*batch)
        gen6_emit_vertex_elements(batch);
 
        gen8_emit_vf_topology(batch);
-       gen8_emit_primitive(batch, vertex_buffer);
+       gen8_emit_primitive(batch);
 
        OUT_BATCH(MI_BATCH_BUFFER_END);
-
-       ret = intel_batch_error(batch);
-       if (ret == 0)
-               ret = intel_batch_total_used(batch);
-
-       return ret;
 }
-- 
1.7.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to