On Wed, Dec 2, 2015 at 4:15 PM, Kenneth Graunke <kenn...@whitecape.org> wrote: > From: Chris Forbes <chr...@ijw.co.nz> > > Signed-off-by: Chris Forbes <chr...@ijw.co.nz>
The commit title should be some imperative statement. Maybe just add "Add" to the beginning. > --- > src/mesa/drivers/dri/i965/brw_context.h | 17 +++- > src/mesa/drivers/dri/i965/gen7_blorp.cpp | 8 ++ > src/mesa/drivers/dri/i965/gen7_urb.c | 162 > +++++++++++++++++++++++++------ > 3 files changed, 157 insertions(+), 30 deletions(-) > > The URB code could use some janitorial work - using arrays based on > MESA_SHADER_* instead of replicating a bunch of code would be much nicer. > > I just don't feel like doing it today. > > diff --git a/src/mesa/drivers/dri/i965/brw_context.h > b/src/mesa/drivers/dri/i965/brw_context.h > index e22f21d..88f6713 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.h > +++ b/src/mesa/drivers/dri/i965/brw_context.h > @@ -995,6 +995,8 @@ struct brw_context > struct { > GLuint vsize; /* vertex size plus header in urb registers */ > GLuint gsize; /* GS output size in urb registers */ > + GLuint hsize; /* Tessellation control output size in urb > registers */ > + GLuint dsize; /* Tessellation evaluation output size in > urb registers */ > GLuint csize; /* constant buffer size in urb registers */ > GLuint sfsize; /* setup data size in urb registers */ > > @@ -1007,12 +1009,16 @@ struct brw_context > GLuint max_gs_entries; /* Maximum number of GS entries */ > > GLuint nr_vs_entries; > + GLuint nr_hs_entries; > + GLuint nr_ds_entries; > GLuint nr_gs_entries; > GLuint nr_clip_entries; > GLuint nr_sf_entries; > GLuint nr_cs_entries; > > GLuint vs_start; > + GLuint hs_start; > + GLuint ds_start; > GLuint gs_start; > GLuint clip_start; > GLuint sf_start; > @@ -1023,6 +1029,7 @@ struct brw_context > * URB space for the GS. > */ > bool gs_present; > + bool ts_present; > } urb; > > > @@ -1628,12 +1635,18 @@ void gen8_emit_3dstate_sample_pattern(struct > brw_context *brw); > /* gen7_urb.c */ > void > gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size, > + unsigned hs_size, unsigned ds_size, > unsigned gs_size, unsigned fs_size); > > void > gen7_emit_urb_state(struct brw_context *brw, > - unsigned nr_vs_entries, unsigned vs_size, > - unsigned vs_start, unsigned nr_gs_entries, > + unsigned nr_vs_entries, > + unsigned vs_size, unsigned vs_start, > + unsigned nr_hs_entries, > + unsigned hs_size, unsigned hs_start, > + unsigned nr_ds_entries, > + unsigned ds_size, unsigned ds_start, > + unsigned nr_gs_entries, > unsigned gs_size, unsigned gs_start); > > > diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp > b/src/mesa/drivers/dri/i965/gen7_blorp.cpp > index e87b9d1..89b73ca 100644 > --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp > +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp > @@ -50,6 +50,8 @@ gen7_blorp_emit_urb_config(struct brw_context *brw) > unsigned urb_size = (brw->is_haswell && brw->gt == 3) ? 32 : 16; > gen7_emit_push_constant_state(brw, > urb_size / 2 /* vs_size */, > + 0 /* hs_size */, > + 0 /* ds_size */, > 0 /* gs_size */, > urb_size / 2 /* fs_size */); > > @@ -60,6 +62,12 @@ gen7_blorp_emit_urb_config(struct brw_context *brw) > 32 /* num_vs_entries */, > 2 /* vs_size */, > 2 /* vs_start */, > + 0 /* num_hs_entries */, > + 1 /* hs_size */, > + 2 /* hs_start */, > + 0 /* num_ds_entries */, > + 1 /* ds_size */, > + 2 /* ds_start */, > 0 /* num_gs_entries */, > 1 /* gs_size */, > 2 /* gs_start */); > diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c > b/src/mesa/drivers/dri/i965/gen7_urb.c > index 161de77..9a09a19 100644 > --- a/src/mesa/drivers/dri/i965/gen7_urb.c > +++ b/src/mesa/drivers/dri/i965/gen7_urb.c > @@ -34,7 +34,7 @@ > * __________-__________ _________________-_________________ > * / \ / \ > * +-------------------------------------------------------------+ > - * | VS/FS/GS Push | VS/GS URB | > + * | VS/HS/DS/GS/FS Push | VS/HS/DS/GS URB | > * | Constants | Entries | > * +-------------------------------------------------------------+ > * > @@ -65,22 +65,29 @@ gen7_allocate_push_constants(struct brw_context *brw) > (brw->gen >= 8 || (brw->is_haswell && brw->gt == 3)) ? 2 : 1; > > /* BRW_NEW_GEOMETRY_PROGRAM */ > + int stages = 2; > + /* BRW_NEW_TESS_CTRL_PROGRAM, BRW_NEW_TESS_EVAL_PROGRAM */ > bool gs_present = brw->geometry_program; > + if (gs_present) > + stages += 1; > + bool ts_present = brw->tess_eval_program; > + if (ts_present) { > + assert(brw->tess_ctrl_program); > + stages += 2; > + } > > - unsigned vs_size, gs_size; > - if (gs_present) { > - vs_size = avail_size / 3; > - avail_size -= vs_size; > - gs_size = avail_size / 2; > - avail_size -= gs_size; > - } else { > - vs_size = avail_size / 2; > - avail_size -= vs_size; > - gs_size = 0; > + unsigned vs_size, hs_size = 0, ds_size = 0, gs_size = 0; > + avail_size -= (vs_size = avail_size / stages--); > + if (ts_present) { > + avail_size -= (hs_size = avail_size / stages--); > + avail_size -= (ds_size = avail_size / stages--); Whew. > } > + if (gs_present) > + avail_size -= (gs_size = avail_size / stages--); > unsigned fs_size = avail_size; > > gen7_emit_push_constant_state(brw, multiplier * vs_size, > + multiplier * hs_size, multiplier * ds_size, > multiplier * gs_size, multiplier * fs_size); > > /* From p115 of the Ivy Bridge PRM (3.2.1.4 > 3DSTATE_PUSH_CONSTANT_ALLOC_VS): > @@ -99,15 +106,24 @@ gen7_allocate_push_constants(struct brw_context *brw) > > void > gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size, > + unsigned hs_size, unsigned ds_size, > unsigned gs_size, unsigned fs_size) > { > unsigned offset = 0; > > - BEGIN_BATCH(6); > + BEGIN_BATCH(10); > OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_VS << 16 | (2 - 2)); > OUT_BATCH(vs_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT); > offset += vs_size; > > + OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_HS << 16 | (2 - 2)); > + OUT_BATCH(hs_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT); > + offset += hs_size; > + > + OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_DS << 16 | (2 - 2)); > + OUT_BATCH(ds_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT); > + offset += ds_size; > + > OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_GS << 16 | (2 - 2)); > OUT_BATCH(gs_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT); > offset += gs_size; > @@ -130,7 +146,10 @@ gen7_emit_push_constant_state(struct brw_context *brw, > unsigned vs_size, > const struct brw_tracked_state gen7_push_constant_space = { > .dirty = { > .mesa = 0, > - .brw = BRW_NEW_CONTEXT | BRW_NEW_GEOMETRY_PROGRAM, > + .brw = BRW_NEW_CONTEXT | > + BRW_NEW_GEOMETRY_PROGRAM | > + BRW_NEW_TESS_CTRL_PROGRAM | > + BRW_NEW_TESS_EVAL_PROGRAM, > }, > .emit = gen7_allocate_push_constants, > }; > @@ -138,6 +157,7 @@ const struct brw_tracked_state gen7_push_constant_space = > { > static void > gen7_upload_urb(struct brw_context *brw) > { > + const struct brw_device_info *devinfo = brw->intelScreen->devinfo; > const int push_size_kB = > (brw->gen >= 8 || (brw->is_haswell && brw->gt == 3)) ? 32 : 16; > > @@ -149,27 +169,46 @@ gen7_upload_urb(struct brw_context *brw) > unsigned gs_size = gs_present ? brw->gs.prog_data->base.urb_entry_size : > 1; > unsigned gs_entry_size_bytes = gs_size * 64; > > + /* BRW_NEW_TESS_CTRL_PROGRAM, BRW_NEW_TCS_PROG_DATA */ > + /* BRW_NEW_TESS_EVAL_PROGRAM, BRW_NEW_TES_PROG_DATA */ > + const bool ts_present = brw->tess_eval_program; > + if (ts_present) > + assert(brw->tess_ctrl_program); > + unsigned hs_size = ts_present ? brw->tcs.prog_data->base.urb_entry_size : > 1; > + unsigned hs_entry_size_bytes = hs_size * 64; > + unsigned ds_size = ts_present ? brw->tes.prog_data->base.urb_entry_size : > 1; > + unsigned ds_entry_size_bytes = ds_size * 64; > + > /* If we're just switching between programs with the same URB > requirements, > * skip the rest of the logic. > */ > if (!(brw->ctx.NewDriverState & BRW_NEW_CONTEXT) && > brw->urb.vsize == vs_size && > brw->urb.gs_present == gs_present && > - brw->urb.gsize == gs_size) { > + brw->urb.gsize == gs_size && > + brw->urb.ts_present == ts_present && > + brw->urb.hsize == hs_size && > + brw->urb.dsize == ds_size) { > return; > } > brw->urb.vsize = vs_size; > brw->urb.gs_present = gs_present; > brw->urb.gsize = gs_size; > + brw->urb.ts_present = ts_present; > + brw->urb.hsize = hs_size; > + brw->urb.dsize = ds_size; > + I suspect you didn't mean to add this newline. > > /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): > * > * VS Number of URB Entries must be divisible by 8 if the VS URB Entry > * Allocation Size is less than 9 512-bit URB entries. > * > - * Similar text exists for GS. > + * Similar text exists for HS, DS and GS. > */ > unsigned vs_granularity = (vs_size < 9) ? 8 : 1; > + unsigned hs_granularity = (hs_size < 9) ? 8 : 1; > + unsigned ds_granularity = (ds_size < 9) ? 8 : 1; > unsigned gs_granularity = (gs_size < 9) ? 8 : 1; > > /* URB allocations must be done in 8k chunks. */ > @@ -190,8 +229,10 @@ gen7_upload_urb(struct brw_context *brw) > */ > > /* VS has a lower limit on the number of URB entries */ > + unsigned vs_min_entries = ts_present ? 192 : brw->urb.min_vs_entries; The 3DSTATE_URB_VS documentation says this is BDW-only. > + > unsigned vs_chunks = > - ALIGN(brw->urb.min_vs_entries * vs_entry_size_bytes, chunk_size_bytes) > / > + ALIGN(vs_min_entries * vs_entry_size_bytes, chunk_size_bytes) / > chunk_size_bytes; > unsigned vs_wants = > ALIGN(brw->urb.max_vs_entries * vs_entry_size_bytes, > @@ -215,14 +256,36 @@ gen7_upload_urb(struct brw_context *brw) > chunk_size_bytes) / chunk_size_bytes - gs_chunks; > } > > + unsigned hs_chunks = 0; > + unsigned hs_wants = 0; > + unsigned ds_chunks = 0; > + unsigned ds_wants = 0; > + > + if (ts_present) { > + hs_chunks = > + ALIGN(hs_granularity * hs_entry_size_bytes, chunk_size_bytes) / > + chunk_size_bytes; > + hs_wants = > + ALIGN(brw->urb.max_hs_entries * hs_entry_size_bytes, > + chunk_size_bytes) / chunk_size_bytes - hs_chunks; > + > + ds_chunks = > + ALIGN(devinfo->urb.min_ds_entries * ds_entry_size_bytes, > chunk_size_bytes) / > + chunk_size_bytes; > + ds_wants = > + ALIGN(brw->urb.max_ds_entries * ds_entry_size_bytes, > + chunk_size_bytes) / chunk_size_bytes - ds_chunks; Align the overflowing expression in these ALIGN()s > + } > + > /* There should always be enough URB space to satisfy the minimum > * requirements of each stage. > */ > - unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks; > + unsigned total_needs = push_constant_chunks + > + vs_chunks + hs_chunks + ds_chunks + gs_chunks; > assert(total_needs <= urb_chunks); > > /* Mete out remaining space (if any) in proportion to "wants". */ > - unsigned total_wants = vs_wants + gs_wants; > + unsigned total_wants = vs_wants + hs_wants + ds_wants + gs_wants; > unsigned remaining_space = urb_chunks - total_needs; > if (remaining_space > total_wants) > remaining_space = total_wants; > @@ -231,61 +294,99 @@ gen7_upload_urb(struct brw_context *brw) > roundf(vs_wants * (((float) remaining_space) / total_wants)); > vs_chunks += vs_additional; > remaining_space -= vs_additional; > + total_wants -= vs_wants; > + > + unsigned hs_additional = (unsigned) > + round(hs_wants * (((double) remaining_space) / total_wants)); s/(unsigned) round/lround/ > + hs_chunks += hs_additional; > + remaining_space -= hs_additional; > + total_wants -= hs_wants; > + > + unsigned ds_additional = (unsigned) > + round(ds_wants * (((double) remaining_space) / total_wants)); s/(unsigned) round/lround/ _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev