On Monday, April 24, 2017 3:19:24 PM PDT Rafael Antognolli wrote: [snip] > diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c > b/src/mesa/drivers/dri/i965/gen7_gs_state.c > deleted file mode 100644 > index 1b5b782..0000000 > --- a/src/mesa/drivers/dri/i965/gen7_gs_state.c > +++ /dev/null > @@ -1,168 +0,0 @@ > -/* > - * Copyright © 2013 Intel Corporation > - * > - * Permission is hereby granted, free of charge, to any person obtaining a > - * copy of this software and associated documentation files (the "Software"), > - * to deal in the Software without restriction, including without limitation > - * the rights to use, copy, modify, merge, publish, distribute, sublicense, > - * and/or sell copies of the Software, and to permit persons to whom the > - * Software is furnished to do so, subject to the following conditions: > - * > - * The above copyright notice and this permission notice (including the next > - * paragraph) shall be included in all copies or substantial portions of the > - * Software. > - * > - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > - * DEALINGS IN THE SOFTWARE. > - */ > - > -#include "brw_context.h" > -#include "brw_state.h" > -#include "brw_defines.h" > -#include "intel_batchbuffer.h" > - > -static void > -upload_gs_state(struct brw_context *brw) > -{ > - const struct gen_device_info *devinfo = &brw->screen->devinfo; > - const struct brw_stage_state *stage_state = &brw->gs.base; > - const int max_threads_shift = brw->is_haswell ? > - HSW_GS_MAX_THREADS_SHIFT : GEN6_GS_MAX_THREADS_SHIFT; > - /* BRW_NEW_GEOMETRY_PROGRAM */ > - bool active = brw->geometry_program; > - /* BRW_NEW_GS_PROG_DATA */ > - const struct brw_stage_prog_data *prog_data = stage_state->prog_data; > - const struct brw_vue_prog_data *vue_prog_data = > - brw_vue_prog_data(stage_state->prog_data); > - const struct brw_gs_prog_data *gs_prog_data = > - brw_gs_prog_data(stage_state->prog_data); > - > - /** > - * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages > > - * Geometry > Geometry Shader > State: > - * > - * "Note: Because of corruption in IVB:GT2, software needs to flush > the > - * whole fixed function pipeline when the GS enable changes value in > - * the 3DSTATE_GS." > - * > - * The hardware architects have clarified that in this context "flush the > - * whole fixed function pipeline" means to emit a PIPE_CONTROL with the > "CS > - * Stall" bit set. > - */ > - if (!brw->is_haswell && brw->gt == 2 && brw->gs.enabled != active) > - gen7_emit_cs_stall_flush(brw);
You're missing this flush and comment. Please add it back. > - > - if (active) { > - BEGIN_BATCH(7); > - OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); > - OUT_BATCH(stage_state->prog_offset); > - OUT_BATCH(((ALIGN(stage_state->sampler_count, 4)/4) << > - GEN6_GS_SAMPLER_COUNT_SHIFT) | > - ((prog_data->binding_table.size_bytes / 4) << > - GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); > - > - if (prog_data->total_scratch) { > - OUT_RELOC(stage_state->scratch_bo, > - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, > - ffs(stage_state->per_thread_scratch) - 11); > - } else { > - OUT_BATCH(0); > - } > - > - uint32_t dw4 = > - ((gs_prog_data->output_vertex_size_hwords * 2 - 1) << > - GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT) | > - (gs_prog_data->output_topology << GEN7_GS_OUTPUT_TOPOLOGY_SHIFT) | > - (vue_prog_data->urb_read_length << > - GEN6_GS_URB_READ_LENGTH_SHIFT) | > - (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) | > - (prog_data->dispatch_grf_start_reg << > - GEN6_GS_DISPATCH_START_GRF_SHIFT); > - > - /* Note: the meaning of the GEN7_GS_REORDER_TRAILING bit changes > between > - * Ivy Bridge and Haswell. > - * > - * On Ivy Bridge, setting this bit causes the vertices of a triangle > - * strip to be delivered to the geometry shader in an order that does > - * not strictly follow the OpenGL spec, but preserves triangle > - * orientation. For example, if the vertices are (1, 2, 3, 4, 5), then > - * the geometry shader sees triangles: > - * > - * (1, 2, 3), (2, 4, 3), (3, 4, 5) > - * > - * (Clearing the bit is even worse, because it fails to preserve > - * orientation). > - * > - * Triangle strips with adjacency always ordered in a way that > preserves > - * triangle orientation but does not strictly follow the OpenGL spec, > - * regardless of the setting of this bit. > - * > - * On Haswell, both triangle strips and triangle strips with adjacency > - * are always ordered in a way that preserves triangle orientation. > - * Setting this bit causes the ordering to strictly follow the OpenGL > - * spec. > - * > - * So in either case we want to set the bit. Unfortunately on Ivy > - * Bridge this will get the order close to correct but not perfect. > - */ Please keep this comment - it's useful. > - uint32_t dw5 = > - ((devinfo->max_gs_threads - 1) << max_threads_shift) | > - (gs_prog_data->control_data_header_size_hwords << > - GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) | > - ((gs_prog_data->invocations - 1) << > - GEN7_GS_INSTANCE_CONTROL_SHIFT) | > - SET_FIELD(vue_prog_data->dispatch_mode, GEN7_GS_DISPATCH_MODE) | > - GEN6_GS_STATISTICS_ENABLE | > - (gs_prog_data->include_primitive_id ? > - GEN7_GS_INCLUDE_PRIMITIVE_ID : 0) | > - GEN7_GS_REORDER_TRAILING | > - GEN7_GS_ENABLE; > - uint32_t dw6 = 0; > - > - if (brw->is_haswell) { > - dw6 |= gs_prog_data->control_data_format << > - HSW_GS_CONTROL_DATA_FORMAT_SHIFT; > - } else { > - dw5 |= gs_prog_data->control_data_format << > - IVB_GS_CONTROL_DATA_FORMAT_SHIFT; > - } > - > - OUT_BATCH(dw4); > - OUT_BATCH(dw5); > - OUT_BATCH(dw6); > - ADVANCE_BATCH(); > - } else { > - BEGIN_BATCH(7); > - OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); > - OUT_BATCH(0); /* prog_bo */ > - OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) | > - (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); > - OUT_BATCH(0); /* scratch space base offset */ > - OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) | > - (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) | > - GEN7_GS_INCLUDE_VERTEX_HANDLES | > - (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT)); > - OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) | > - GEN6_GS_STATISTICS_ENABLE); > - OUT_BATCH(0); > - ADVANCE_BATCH(); > - } > - brw->gs.enabled = active; > -} > - > -const struct brw_tracked_state gen7_gs_state = { > - .dirty = { > - .mesa = _NEW_TRANSFORM, > - .brw = BRW_NEW_BATCH | > - BRW_NEW_BLORP | > - BRW_NEW_CONTEXT | > - BRW_NEW_GEOMETRY_PROGRAM | > - BRW_NEW_GS_PROG_DATA, > - }, > - .emit = upload_gs_state, > -}; [snip] > diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c > b/src/mesa/drivers/dri/i965/genX_state_upload.c > index d1609f6..d2a936b 100644 > --- a/src/mesa/drivers/dri/i965/genX_state_upload.c > +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c [snip] > +static void > +genX(upload_gs_state)(struct brw_context *brw) > +{ > + const struct gen_device_info *devinfo = &brw->screen->devinfo; > + const struct brw_stage_state *stage_state = &brw->gs.base; > + /* BRW_NEW_GEOMETRY_PROGRAM */ > + bool active = brw->geometry_program; > + > + /* BRW_NEW_GS_PROG_DATA */ > + struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data; > + const struct brw_vue_prog_data *vue_prog_data = > + brw_vue_prog_data(stage_prog_data); > +#if GEN_GEN >= 7 > + const struct brw_gs_prog_data *gs_prog_data = > + brw_gs_prog_data(stage_prog_data); > +#endif > + > + /* _NEW_TRANSFORM */ > +#if GEN_GEN >= 8 > + struct gl_context *ctx = &brw->ctx; > + const struct gl_transform_attrib *transform = &ctx->Transform; > +#endif We can delete this block. > + > +#if GEN_GEN < 7 > + brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_GS), cgs) { > + if (active && stage_state->push_const_size != 0) { > + cgs.Buffer0Valid = true; > + cgs.PointertoGSConstantBuffer0 = stage_state->push_const_offset; > + cgs.GSConstantBuffer0ReadLength = stage_state->push_const_size - 1; > + } > + } > +#endif > + > + if (active) { > + brw_batch_emit(brw, GENX(3DSTATE_GS), gs) { > + INIT_THREAD_DISPATCH_FIELDS(gs, Vertex); > + > +#if GEN_GEN >= 7 > + gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - > 1; > + gs.OutputTopology = gs_prog_data->output_topology; > + gs.ControlDataHeaderSize = > + gs_prog_data->control_data_header_size_hwords; > + > + gs.InstanceControl = gs_prog_data->invocations - 1; > + gs.DispatchMode = vue_prog_data->dispatch_mode; > + > + gs.IncludePrimitiveID = gs_prog_data->include_primitive_id; > + > + gs.ControlDataFormat = gs_prog_data->control_data_format; > +#endif > + > + gs.ReorderMode = TRAILING; > + gs.MaximumNumberofThreads = > + GEN_GEN == 8 ? (devinfo->max_gs_threads / 2 - 1) > + : (devinfo->max_gs_threads - 1); > + > +#if GEN_GEN < 7 > + gs.SOStatisticsEnable = true; > + gs.RenderingEnabled = 1; > + if (brw->geometry_program->info.has_transform_feedback_varyings) > + gs.SVBIPayloadEnable = true; > + > + /* GEN6_GS_SPF_MODE and GEN6_GS_VECTOR_MASK_ENABLE are enabled as it > + * was previously done for gen6. > + * > + * TODO: test with both disabled to see if the HW is behaving > + * as expected, like in gen7. > + */ > + gs.SingleProgramFlow = true; > + gs.VectorMaskEnable = true; > +#endif > + > +#if GEN_GEN >= 8 > + gs.ExpectedVertexCount = gs_prog_data->vertices_in; > + > + if (gs_prog_data->static_vertex_count != -1) { > + gs.StaticOutput = true; > + gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count; > + } > + gs.IncludeVertexHandles = vue_prog_data->include_vue_handles; > + > + gs.UserClipDistanceClipTestEnableBitmask = > + transform->ClipPlanesEnabled; > + gs.UserClipDistanceCullTestEnableBitmask = > + vue_prog_data->cull_distance_mask; > + Drop ClipTestEnableBitmask (but leave CullTestEnableBitmask) - it was was removed in commit 903056e016e3ea52c2f493f8b0938b519ee40894. > + const int urb_entry_write_offset = 1; > + const uint32_t urb_entry_output_length = > + DIV_ROUND_UP(vue_prog_data->vue_map.num_slots, 2) - > + urb_entry_write_offset; > + > + gs.VertexURBEntryOutputReadOffset = urb_entry_write_offset; > + gs.VertexURBEntryOutputLength = MAX2(urb_entry_output_length, 1); > +#endif > + } > +#if GEN_GEN < 7 > + } else if (brw->ff_gs.prog_active) { > + /* In gen6, transform feedback for the VS stage is done with an ad-hoc > GS > + * program. This function provides the needed 3DSTATE_GS for this. > + */ > + upload_gs_state_for_tf(brw); > +#endif > + } else { > + brw_batch_emit(brw, GENX(3DSTATE_GS), gs) { > + gs.StatisticsEnable = true; > +#if GEN_GEN < 7 > + gs.RenderingEnabled = true; > +#endif > + > +#if GEN_GEN < 8 > + gs.DispatchGRFStartRegisterForURBData = 1; > +#if GEN_GEN >= 7 > + gs.IncludeVertexHandles = true; > +#endif > +#endif > + } > + } > +#if GEN_GEN < 7 > + brw->gs.enabled = active; > +#endif > +} > + > +static const struct brw_tracked_state genX(gs_state) = { > + .dirty = { > + .mesa = (GEN_GEN < 8 ? _NEW_TRANSFORM : 0) | We can drop _NEW_TRANSFORM. > + (GEN_GEN < 7 ? _NEW_PROGRAM_CONSTANTS : 0), > + .brw = BRW_NEW_BATCH | > + BRW_NEW_BLORP | > + BRW_NEW_CONTEXT | > + BRW_NEW_GEOMETRY_PROGRAM | > + BRW_NEW_GS_PROG_DATA | > + (GEN_GEN < 7 ? BRW_NEW_FF_GS_PROG_DATA | > + BRW_NEW_PUSH_CONSTANT_ALLOCATION > + : 0), > + }, > + .emit = genX(upload_gs_state), > +}; > + > #endif > > /* ---------------------------------------------------------------------- */ > @@ -1645,6 +1985,99 @@ static const struct brw_tracked_state genX(ps_state) = > { > .emit = genX(upload_ps), > }; > > +/* ---------------------------------------------------------------------- */ > + > +static void > +genX(upload_hs_state)(struct brw_context *brw) > +{ > + const struct gen_device_info *devinfo = &brw->screen->devinfo; > + struct brw_stage_state *stage_state = &brw->tcs.base; > + struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data; > + const struct brw_vue_prog_data *vue_prog_data = > + brw_vue_prog_data(stage_prog_data); > + > + /* BRW_NEW_TES_PROG_DATA */ > + struct brw_tcs_prog_data *tcs_prog_data = > + brw_tcs_prog_data(stage_prog_data); > + > + if (!tcs_prog_data) { > + brw_batch_emit(brw, GENX(3DSTATE_HS), hs); > + } else { > + brw_batch_emit(brw, GENX(3DSTATE_HS), hs) { > + INIT_THREAD_DISPATCH_FIELDS(hs, Vertex); > + > + hs.InstanceCount = tcs_prog_data->instances - 1; > + hs.IncludeVertexHandles = true; > + > + hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1; Indentation is off here. > + } > + } > +} > + > +static const struct brw_tracked_state genX(hs_state) = { > + .dirty = { > + .mesa = 0, > + .brw = BRW_NEW_BATCH | > + BRW_NEW_BLORP | > + BRW_NEW_TCS_PROG_DATA | > + BRW_NEW_TESS_PROGRAMS, > + }, > + .emit = genX(upload_hs_state), > +}; > + > +static void > +genX(upload_ds_state)(struct brw_context *brw) > +{ > + const struct gen_device_info *devinfo = &brw->screen->devinfo; > + const struct brw_stage_state *stage_state = &brw->tes.base; > + struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data; > + > + /* BRW_NEW_TES_PROG_DATA */ > + const struct brw_tes_prog_data *tes_prog_data = > + brw_tes_prog_data(stage_prog_data); > + const struct brw_vue_prog_data *vue_prog_data = > + brw_vue_prog_data(stage_prog_data); > + > +#if GEN_GEN >= 8 > + /* _NEW_TRANSFORM */ > + struct gl_context *ctx = &brw->ctx; > + const struct gl_transform_attrib *transform = &ctx->Transform; > +#endif > + > + if (!tes_prog_data) { > + brw_batch_emit(brw, GENX(3DSTATE_DS), ds); > + } else { > + brw_batch_emit(brw, GENX(3DSTATE_DS), ds) { > + INIT_THREAD_DISPATCH_FIELDS(ds, Patch); > + > + ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1; > + ds.ComputeWCoordinateEnable = > + tes_prog_data->domain == BRW_TESS_DOMAIN_TRI; > + > +#if GEN_GEN >= 8 > + if (vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8) > + ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH; > + ds.UserClipDistanceClipTestEnableBitmask = > + transform->ClipPlanesEnabled; > + ds.UserClipDistanceCullTestEnableBitmask = > + vue_prog_data->cull_distance_mask; Drop ClipTestEnableBitmask (but leave CullTestEnableBitmask) - it was was removed in commit 903056e016e3ea52c2f493f8b0938b519ee40894. That means we can drop the _NEW_TRANSFORM stuff above, and the dirty bit. > +#endif > + } > + } > +} > + > +static const struct brw_tracked_state genX(ds_state) = { > + .dirty = { > + .mesa = (GEN_GEN < 8 ? _NEW_TRANSFORM : 0), (This can just be 0. We were setting it on Gen7-7.5, but there's no reason for that. I'll send a patch...) > + .brw = BRW_NEW_BATCH | > + BRW_NEW_BLORP | > + BRW_NEW_TESS_PROGRAMS | > + BRW_NEW_TES_PROG_DATA | > + (GEN_GEN < 8 ? BRW_NEW_CONTEXT : 0), There's no point in BRW_NEW_CONTEXT - BRW_NEW_BATCH already covers 100% of those cases. > + }, > + .emit = genX(upload_ds_state), > +}; > + > #endif > > /* ---------------------------------------------------------------------- */
signature.asc
Description: This is a digitally signed message part.
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev