On Mon, Jul 25, 2016 at 5:42 PM, Rob Clark <robdcl...@gmail.com> wrote: > On Mon, Jul 25, 2016 at 11:16 AM, Brian Paul <bri...@vmware.com> wrote: >> On 07/18/2016 07:11 AM, Marek Olšák wrote: >>> >>> From: Marek Olšák <marek.ol...@amd.com> >>> >>> The goal is to do this in st_validate_state: >>> while (dirty) >>> atoms[u_bit_scan(&dirty)]->update(st); >>> >>> That implies that atoms can't specify which flags they consume. >>> There is exactly one ST_NEW_* flag for each atom. (58 flags in total) >>> >>> There are macros that combine multiple flags into one for easier use. >>> >>> All _NEW_* flags are translated into ST_NEW_* flags in >>> st_invalidate_state. >>> st/mesa doesn't keep the _NEW_* flags after that. >>> >>> torcs is 2% faster between the previous patch and the end of this series. >>> --- >>> src/mesa/state_tracker/st_atom.c | 153 +++++------------- >>> src/mesa/state_tracker/st_atom.h | 210 >>> +++++++++++++++++-------- >>> src/mesa/state_tracker/st_atom_array.c | 4 - >>> src/mesa/state_tracker/st_atom_atomicbuf.c | 24 --- >>> src/mesa/state_tracker/st_atom_blend.c | 4 - >>> src/mesa/state_tracker/st_atom_clip.c | 4 - >>> src/mesa/state_tracker/st_atom_constbuf.c | 48 ------ >>> src/mesa/state_tracker/st_atom_depth.c | 4 - >>> src/mesa/state_tracker/st_atom_framebuffer.c | 4 - >>> src/mesa/state_tracker/st_atom_image.c | 24 --- >>> src/mesa/state_tracker/st_atom_list.h | 75 +++++++++ >>> src/mesa/state_tracker/st_atom_msaa.c | 8 - >>> src/mesa/state_tracker/st_atom_pixeltransfer.c | 4 - >>> src/mesa/state_tracker/st_atom_rasterizer.c | 16 -- >>> src/mesa/state_tracker/st_atom_sampler.c | 4 - >>> src/mesa/state_tracker/st_atom_scissor.c | 8 - >>> src/mesa/state_tracker/st_atom_shader.c | 24 --- >>> src/mesa/state_tracker/st_atom_stipple.c | 5 - >>> src/mesa/state_tracker/st_atom_storagebuf.c | 24 --- >>> src/mesa/state_tracker/st_atom_tess.c | 4 - >>> src/mesa/state_tracker/st_atom_texture.c | 24 --- >>> src/mesa/state_tracker/st_atom_viewport.c | 4 - >>> src/mesa/state_tracker/st_cb_bitmap.c | 10 +- >>> src/mesa/state_tracker/st_cb_bufferobjects.c | 10 +- >>> src/mesa/state_tracker/st_cb_compute.c | 2 +- >>> src/mesa/state_tracker/st_cb_feedback.c | 2 +- >>> src/mesa/state_tracker/st_cb_program.c | 38 ++--- >>> src/mesa/state_tracker/st_cb_texture.c | 2 +- >>> src/mesa/state_tracker/st_context.c | 100 ++++++++++-- >>> src/mesa/state_tracker/st_context.h | 42 +---- >>> src/mesa/state_tracker/st_draw.c | 4 +- >>> src/mesa/state_tracker/st_manager.c | 4 +- >>> 32 files changed, 377 insertions(+), 516 deletions(-) >>> create mode 100644 src/mesa/state_tracker/st_atom_list.h >>> >>> diff --git a/src/mesa/state_tracker/st_atom.c >>> b/src/mesa/state_tracker/st_atom.c >>> index 9d5cc0f..5843d2a 100644 >>> --- a/src/mesa/state_tracker/st_atom.c >>> +++ b/src/mesa/state_tracker/st_atom.c >>> @@ -37,87 +37,18 @@ >>> #include "st_manager.h" >>> >>> >>> -/** >>> - * This is used to initialize st->render_atoms[]. >>> - */ >>> -static const struct st_tracked_state *render_atoms[] = >>> -{ >>> - &st_update_depth_stencil_alpha, >>> - &st_update_clip, >>> - >>> - &st_update_fp, >>> - &st_update_gp, >>> - &st_update_tep, >>> - &st_update_tcp, >>> - &st_update_vp, >>> - >>> - &st_update_rasterizer, >>> - &st_update_polygon_stipple, >>> - &st_update_viewport, >>> - &st_update_scissor, >>> - &st_update_window_rectangles, >>> - &st_update_blend, >>> - &st_update_vertex_texture, >>> - &st_update_fragment_texture, >>> - &st_update_geometry_texture, >>> - &st_update_tessctrl_texture, >>> - &st_update_tesseval_texture, >>> - &st_update_sampler, /* depends on update_*_texture for swizzle */ >>> - &st_bind_vs_images, >>> - &st_bind_tcs_images, >>> - &st_bind_tes_images, >>> - &st_bind_gs_images, >>> - &st_bind_fs_images, >>> - &st_update_framebuffer, /* depends on update_*_texture and >>> bind_*_images */ >>> - &st_update_msaa, >>> - &st_update_sample_shading, >>> - &st_update_vs_constants, >>> - &st_update_tcs_constants, >>> - &st_update_tes_constants, >>> - &st_update_gs_constants, >>> - &st_update_fs_constants, >>> - &st_bind_vs_ubos, >>> - &st_bind_tcs_ubos, >>> - &st_bind_tes_ubos, >>> - &st_bind_fs_ubos, >>> - &st_bind_gs_ubos, >>> - &st_bind_vs_atomics, >>> - &st_bind_tcs_atomics, >>> - &st_bind_tes_atomics, >>> - &st_bind_fs_atomics, >>> - &st_bind_gs_atomics, >>> - &st_bind_vs_ssbos, >>> - &st_bind_tcs_ssbos, >>> - &st_bind_tes_ssbos, >>> - &st_bind_fs_ssbos, >>> - &st_bind_gs_ssbos, >>> - &st_update_pixel_transfer, >>> - &st_update_tess, >>> - >>> - /* this must be done after the vertex program update */ >>> - &st_update_array >>> -}; >>> - >>> - >>> -/** >>> - * This is used to initialize st->compute_atoms[]. >>> - */ >>> -static const struct st_tracked_state *compute_atoms[] = >>> +/* The list state update functions. */ >>> +static const struct st_tracked_state *atoms[] = >>> { >>> - &st_update_cp, >>> - &st_update_compute_texture, >>> - &st_update_sampler, /* depends on update_compute_texture for swizzle >>> */ >>> - &st_update_cs_constants, >>> - &st_bind_cs_ubos, >>> - &st_bind_cs_atomics, >>> - &st_bind_cs_ssbos, >>> - &st_bind_cs_images, >>> +#define ST_STATE(FLAG, st_update) &st_update, >>> +#include "st_atom_list.h" >>> +#undef ST_STATE >>> }; >>> >>> >>> void st_init_atoms( struct st_context *st ) >>> { >>> - /* no-op */ >>> + STATIC_ASSERT(ARRAY_SIZE(atoms) <= 64); >>> } >>> >>> >>> @@ -127,13 +58,6 @@ void st_destroy_atoms( struct st_context *st ) >>> } >>> >>> >>> - >>> -static bool >>> -check_state(const struct st_state_flags *a, const struct st_state_flags >>> *b) >>> -{ >>> - return (a->mesa & b->mesa) || (a->st & b->st); >>> -} >>> - >>> /* Too complex to figure out, just check every time: >>> */ >>> static void check_program_state( struct st_context *st ) >>> @@ -141,13 +65,13 @@ static void check_program_state( struct st_context >>> *st ) >>> struct gl_context *ctx = st->ctx; >>> >>> if (ctx->VertexProgram._Current != &st->vp->Base) >>> - st->dirty.st |= ST_NEW_VERTEX_PROGRAM; >>> + st->dirty |= ST_NEW_VERTEX_PROGRAM; >>> >>> if (ctx->FragmentProgram._Current != &st->fp->Base) >>> - st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM; >>> + st->dirty |= ST_NEW_FRAGMENT_PROGRAM; >>> >>> if (ctx->GeometryProgram._Current != &st->gp->Base) >>> - st->dirty.st |= ST_NEW_GEOMETRY_PROGRAM; >>> + st->dirty |= ST_NEW_GEOMETRY_PROGRAM; >>> } >>> >>> static void check_attrib_edgeflag(struct st_context *st) >>> @@ -165,14 +89,14 @@ static void check_attrib_edgeflag(struct st_context >>> *st) >>> arrays[VERT_ATTRIB_EDGEFLAG]->StrideB != 0; >>> if (vertdata_edgeflags != st->vertdata_edgeflags) { >>> st->vertdata_edgeflags = vertdata_edgeflags; >>> - st->dirty.st |= ST_NEW_VERTEX_PROGRAM; >>> + st->dirty |= ST_NEW_VERTEX_PROGRAM; >>> } >>> >>> edgeflag_culls_prims = edgeflags_enabled && !vertdata_edgeflags && >>> >>> !st->ctx->Current.Attrib[VERT_ATTRIB_EDGEFLAG][0]; >>> if (edgeflag_culls_prims != st->edgeflag_culls_prims) { >>> st->edgeflag_culls_prims = edgeflag_culls_prims; >>> - st->dirty.st |= ST_NEW_RASTERIZER; >>> + st->dirty |= ST_NEW_RASTERIZER; >>> } >>> } >>> >>> @@ -183,49 +107,42 @@ static void check_attrib_edgeflag(struct st_context >>> *st) >>> >>> void st_validate_state( struct st_context *st, enum st_pipeline pipeline >>> ) >>> { >>> - const struct st_tracked_state **atoms; >>> - struct st_state_flags *state; >>> - GLuint num_atoms; >>> - GLuint i; >>> + uint64_t dirty, pipeline_mask; >>> + uint32_t dirty_lo, dirty_hi; >>> + >>> + /* Get Mesa driver state. */ >>> + st->dirty |= st->ctx->NewDriverState & ST_ALL_STATES_MASK; >>> + st->ctx->NewDriverState = 0; >>> >>> /* Get pipeline state. */ >>> switch (pipeline) { >>> - case ST_PIPELINE_RENDER: >>> - atoms = render_atoms; >>> - num_atoms = ARRAY_SIZE(render_atoms); >>> - state = &st->dirty; >>> + case ST_PIPELINE_RENDER: >>> + check_attrib_edgeflag(st); >>> + check_program_state(st); >>> + st_manager_validate_framebuffers(st); >>> + >>> + pipeline_mask = ST_PIPELINE_RENDER_STATE_MASK; >>> break; >>> case ST_PIPELINE_COMPUTE: >>> - atoms = compute_atoms; >>> - num_atoms = ARRAY_SIZE(compute_atoms); >>> - state = &st->dirty_cp; >>> + pipeline_mask = ST_PIPELINE_COMPUTE_STATE_MASK; >>> break; >>> default: >>> unreachable("Invalid pipeline specified"); >>> } >>> >>> - /* Get Mesa driver state. */ >>> - st->dirty.st |= st->ctx->NewDriverState; >>> - st->dirty_cp.st |= st->ctx->NewDriverState; >>> - st->ctx->NewDriverState = 0; >>> - >>> - if (pipeline == ST_PIPELINE_RENDER) { >>> - check_attrib_edgeflag(st); >>> - >>> - check_program_state(st); >>> - >>> - st_manager_validate_framebuffers(st); >>> - } >>> - >>> - if (state->st == 0 && state->mesa == 0) >>> + dirty = st->dirty & pipeline_mask; >>> + if (!dirty) >>> return; >>> >>> - /*printf("%s %x/%x\n", __func__, state->mesa, state->st);*/ >>> + dirty_lo = dirty; >>> + dirty_hi = dirty >> 32; >>> >>> - for (i = 0; i < num_atoms; i++) { >>> - if (check_state(state, &atoms[i]->dirty)) >>> - atoms[i]->update( st ); >>> - } >>> + /* Update states. */ >>> + while (dirty_lo) >>> + atoms[u_bit_scan(&dirty_lo)]->update(st); >>> + while (dirty_hi) >>> + atoms[32 + u_bit_scan(&dirty_hi)]->update(st); >>> >> >> Could we just use the u_bit_scan64() function and void the hi/lo split? > > fwiw, we actually did discuss that on irc, but I guess no one > summarized on email thread.. > > Marek's concern was that would generate worse code on 32b since it > would pull the right-shift into the loop. > > I'm not entirely sure if gcc would be clever enough in this case or > not. I guess someone needs to compare generated asm in both cases. > And either use u_bit_scan64() if the compiler is clever enough, or add > a comment explaining the reason.
Yeah, I added this comment before the loops: "Don't use u_bit_scan64, it may be slower on 32-bit." On 32-bit, ffsll is an if-then-else expression with some arithmetic and shifting one bit to the left is another if-then-else expression. Marek _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev