[Mesa-dev] [PATCH v2] i965/fs: Add remove_extra_rounding_modes optimization
Although from SPIR-V point of view, rounding modes are attached to the operation/destination, on i965 it is a status, so we don't need to explicitly set the rounding mode if the one we want is already set. v2: Use a single SHADER_OPCODE_RND_MODE opcode taking an immediate with the rounding mode (Curro) Signed-off-by: Jose Maria Casanova Crespo Signed-off-by: Alejandro Piñeiro opcode == SHADER_OPCODE_RND_MODE) { + assert(inst->src[0].file == BRW_IMMEDIATE_VALUE); + + current_mode = (brw_rnd_mode) inst->src[0].d; + + if (current_mode == prev_mode) { +inst->remove(block); +progress = true; + } else { +prev_mode = current_mode; + } + } + } + + if (progress) + invalidate_live_intervals(); + + return progress; +} + + static void clear_deps_for_inst_src(fs_inst *inst, bool *deps, int first_grf, int grf_len) { @@ -5748,6 +5788,7 @@ fs_visitor::optimize() int pass_num = 0; OPT(opt_drop_redundant_mov_to_flags); + OPT(remove_extra_rounding_modes); do { progress = false; diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index f1ba193de7e..b9476e69edb 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -150,6 +150,7 @@ public: bool eliminate_find_live_channel(); bool dead_code_eliminate(); bool remove_duplicate_mrf_writes(); + bool remove_extra_rounding_modes(); bool opt_sampler_eot(); bool virtual_grf_interferes(int a, int b); -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2] i965/fs: Enable rounding mode on f2f16 ops
By default we don't set the rounding mode. We only set round-to-near-even or round-to-zero mode if explicitly set from nir. v2: Use a single SHADER_OPCODE_RND_MODE opcode taking an immediate with the rounding mode (Curro) Signed-off-by: Jose Maria Casanova Crespo Signed-off-by: Alejandro Piñeiro --- src/intel/compiler/brw_fs_nir.cpp | 8 1 file changed, 8 insertions(+) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index a356327c690..7b06cb139d6 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -717,6 +717,14 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) inst->saturate = instr->dest.saturate; break; + case nir_op_f2f16_rtne: + case nir_op_f2f16_rtz: + if (instr->op == nir_op_f2f16_rtz) + bld.emit(SHADER_OPCODE_RND_MODE, bld.null_reg_ud(), brw_imm_d(BRW_RND_MODE_RTZ)); + else if (instr->op == nir_op_f2f16_rtne) + bld.emit(SHADER_OPCODE_RND_MODE, bld.null_reg_ud(), brw_imm_d(BRW_RND_MODE_RTNE)); + /* fallthrough */ + case nir_op_f2f16: /* In theory, it would be better to use BRW_OPCODE_F32TO16. Depending * on the HW gen, it is a special hw opcode or just a MOV, and -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2] i965/fs: Define new shader opcode to set rounding modes
Although it is possible to emit them directly as AND/OR on brw_fs_nir, having a specific opcode makes it easier to remove duplicate settings later. v2: (Curro) - Set thread control to 'switch' when using the control register - Use a single SHADER_OPCODE_RND_MODE opcode taking an immediate with the rounding mode. - Avoid magic numbers setting rounding mode field at control register. Signed-off-by: Alejandro Piñeiro Signed-off-by: Jose Maria Casanova Crespo --- src/intel/compiler/brw_eu.h | 3 +++ src/intel/compiler/brw_eu_defines.h | 17 + src/intel/compiler/brw_eu_emit.c| 34 + src/intel/compiler/brw_fs_generator.cpp | 5 + src/intel/compiler/brw_shader.cpp | 4 5 files changed, 63 insertions(+) diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index 8e597b212a6..106bf03530d 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -500,6 +500,9 @@ brw_broadcast(struct brw_codegen *p, struct brw_reg src, struct brw_reg idx); +void +brw_rounding_mode(struct brw_codegen *p, + enum brw_rnd_mode mode); /*** * brw_eu_util.c: */ diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index da482b73c58..91d88fe8952 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -388,6 +388,9 @@ enum opcode { SHADER_OPCODE_TYPED_SURFACE_WRITE, SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL, + + SHADER_OPCODE_RND_MODE, + SHADER_OPCODE_MEMORY_FENCE, SHADER_OPCODE_GEN4_SCRATCH_READ, @@ -1214,4 +1217,18 @@ enum brw_message_target { /* R0 */ # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT 27 +/* CR0.0[5:4] Floating-Point Rounding Modes + * Skylake PRM, Volume 7 Part 1, "Control Register", page 756 + */ + +#define BRW_CR0_RND_MODE_MASK 0x30 +#define BRW_CR0_RND_MODE_SHIFT4 + +enum PACKED brw_rnd_mode { + BRW_RND_MODE_RTNE = 0, /* Round to Nearest or Even */ + BRW_RND_MODE_RU = 1,/* Round Up, toward +inf */ + BRW_RND_MODE_RD = 2,/* Round Down, toward -inf */ + BRW_RND_MODE_RTZ = 3/* Round Toward Zero */ +}; + #endif /* BRW_EU_DEFINES_H */ diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index 8c952e7da26..12164653e47 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -3530,3 +3530,37 @@ brw_WAIT(struct brw_codegen *p) brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1); brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE); } + +/** + * Changes the floating point rounding mode updating the control register + * field defined at cr0.0[5-6] bits. This function supports the changes to + * RTNE (00), RU (01), RD (10) and RTZ (11) rounding using bitwise operations. + * Only RTNE and RTZ rounding are enabled at nir. + */ + +void +brw_rounding_mode(struct brw_codegen *p, + enum brw_rnd_mode mode) +{ + const unsigned bits = mode << BRW_CR0_RND_MODE_SHIFT; + + if (bits != BRW_CR0_RND_MODE_MASK) { + brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), + brw_imm_ud(~BRW_CR0_RND_MODE_MASK)); + + /* From the Skylake PRM, Volume 7, page 760: + * "Implementation Restriction on Register Access: When the control + * register is used as an explicit source and/or destination, hardware + * does not ensure execution pipeline coherency. Software must set the + * thread control field to ‘switch’ for an instruction that uses + * control register as an explicit operand." + */ + brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH); +} + + if (bits) { + brw_inst *inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), + brw_imm_ud(bits)); + brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH); + } +} diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index afaec5c9497..ff9880ebfe8 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -2144,6 +2144,11 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) brw_DIM(p, dst, retype(src[0], BRW_REGISTER_TYPE_F)); break; + case SHADER_OPCODE_RND_MODE: + assert(src[0].file == BRW_IMMEDIATE_VALUE); + brw_rounding_mode(p, (brw_rnd_mode) src[0].d); + break; + default: unreachable("Unsupported opcode"); diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index c62b8ba6140..19dd960be3a 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -486,6 +486,9 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opc
[Mesa-dev] [Bug 98691] egl.h:55: error: redefinition of typedef ‘EGLDisplay’
https://bugs.freedesktop.org/show_bug.cgi?id=98691 Vinson Lee changed: What|Removed |Added Status|NEW |RESOLVED Resolution|--- |FIXED --- Comment #2 from Vinson Lee --- commit 4e97084591c206338af4425c33edb833de348816 Author: Matt Turner Date: Thu Jul 6 18:40:53 2017 -0700 egl: Fix inclusion of egl.h+mesa_glinterop.h Previously clang would warn about redefinition of typedef EGLDisplay. Avoid this by adding preprocessor guards to mesa_glinterop.h and including it after EGL.h is indirectly included. Reviewed-by: Jordan Justen -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] egl/dri2: only destroy created objects
On 08/30/2017 06:50 AM, Tapani Pälli wrote: On 08/29/2017 12:51 PM, Michael Olbrich wrote: dri2_display_destroy may be called by dri2_initialize_wayland_drm() if initialization fails. In this case, these objects may not be initialized. Same thing can happen with dri2_initialize_wayland_swrast, would be good to fix both at one go. and that is actually what the patch does, both call dri2_display_destroy the same way :) Reviewed-by: Tapani Pälli Signed-off-by: Michael Olbrich --- src/egl/drivers/dri2/egl_dri2.c | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index a197e0456fd2..d86c6af225fc 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -944,9 +944,12 @@ dri2_display_destroy(_EGLDisplay *disp) zwp_linux_dmabuf_v1_destroy(dri2_dpy->wl_dmabuf); if (dri2_dpy->wl_shm) wl_shm_destroy(dri2_dpy->wl_shm); - wl_registry_destroy(dri2_dpy->wl_registry); - wl_event_queue_destroy(dri2_dpy->wl_queue); - wl_proxy_wrapper_destroy(dri2_dpy->wl_dpy_wrapper); + if (dri2_dpy->wl_registry) + wl_registry_destroy(dri2_dpy->wl_registry); + if (dri2_dpy->wl_queue) + wl_event_queue_destroy(dri2_dpy->wl_queue); + if (dri2_dpy->wl_dpy_wrapper) + wl_proxy_wrapper_destroy(dri2_dpy->wl_dpy_wrapper); u_vector_finish(&dri2_dpy->wl_modifiers.argb); u_vector_finish(&dri2_dpy->wl_modifiers.xrgb); u_vector_finish(&dri2_dpy->wl_modifiers.rgb565); ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] egl/dri2: only destroy created objects
On 08/29/2017 12:51 PM, Michael Olbrich wrote: dri2_display_destroy may be called by dri2_initialize_wayland_drm() if initialization fails. In this case, these objects may not be initialized. Same thing can happen with dri2_initialize_wayland_swrast, would be good to fix both at one go. Signed-off-by: Michael Olbrich --- src/egl/drivers/dri2/egl_dri2.c | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index a197e0456fd2..d86c6af225fc 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -944,9 +944,12 @@ dri2_display_destroy(_EGLDisplay *disp) zwp_linux_dmabuf_v1_destroy(dri2_dpy->wl_dmabuf); if (dri2_dpy->wl_shm) wl_shm_destroy(dri2_dpy->wl_shm); - wl_registry_destroy(dri2_dpy->wl_registry); - wl_event_queue_destroy(dri2_dpy->wl_queue); - wl_proxy_wrapper_destroy(dri2_dpy->wl_dpy_wrapper); + if (dri2_dpy->wl_registry) + wl_registry_destroy(dri2_dpy->wl_registry); + if (dri2_dpy->wl_queue) + wl_event_queue_destroy(dri2_dpy->wl_queue); + if (dri2_dpy->wl_dpy_wrapper) + wl_proxy_wrapper_destroy(dri2_dpy->wl_dpy_wrapper); u_vector_finish(&dri2_dpy->wl_modifiers.argb); u_vector_finish(&dri2_dpy->wl_modifiers.xrgb); u_vector_finish(&dri2_dpy->wl_modifiers.rgb565); ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 101709] [llvmpipe] piglit gl-1.0-scissor-offscreen regression
https://bugs.freedesktop.org/show_bug.cgi?id=101709 Brian Paul changed: What|Removed |Added Resolution|--- |FIXED Status|NEW |RESOLVED --- Comment #4 from Brian Paul --- Fixed w/ commit 88cdf16871a0f1cd8ec3844072051ee38e945600 -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] llvmpipe: initialize llvmpipe->dirty with LP_NEW_SCISSOR
On 08/29/2017 06:17 PM, Roland Scheidegger wrote: Looks good to me, thanks. Reviewed-by: Roland Scheidegger Albeit I'm not quite sure why it is never set here? This particular test sets a 0 x 0 scissor region. All the pipe_scissor_state members are zero and when the state tracker does a memcmp() with its copy it sees no difference and skips the call to pipe->set_scissor_state(). -Brian This bug looks similar in nature to the uninitialized fb (see 8bfe451ed30918244618608871423b2a72cf9767) and I thought that was impossible to hit with gl state tracker... Roland Am 28.08.2017 um 22:20 schrieb Brian Paul: If llvmpipe_set_scissor_states() is never called, we still need to be sure that derived scissor/clip state is updated. As of commit 743ad599a97d09b1 that function might not be called. Fixes regressed Piglit gl-1.0-scissor-offscreen -fbo -auto test. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101709 --- src/gallium/drivers/llvmpipe/lp_context.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 599a9f1..613d60f 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -227,6 +227,12 @@ llvmpipe_create_context(struct pipe_screen *screen, void *priv, lp_reset_counters(); + /* If llvmpipe_set_scissor_states() is never called, we still need to +* make sure that derived scissor state is computed. +* See https://bugs.freedesktop.org/show_bug.cgi?id=101709 +*/ + llvmpipe->dirty |= LP_NEW_SCISSOR; + return &llvmpipe->pipe; fail: ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Mesa-stable] [PATCH v2 1/2] gallivm: correct channel shift logic on big endian
On Tue, Aug 29, 2017 at 1:13 PM, Roland Scheidegger wrote: > That said, I already reviewed this: > https://lists.freedesktop.org/archives/mesa-dev/2017-August/167515.html Since I don't think Ben has commit access, it seems that you should be the one to commit it as well. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/3] radeonsi: ensure cache flushes happen before SET_PREDICATION packets
For the series: Reviewed-by: Marek Olšák Marek On Fri, Aug 25, 2017 at 4:40 PM, Nicolai Hähnle wrote: > From: Nicolai Hähnle > > The data is read when the render_cond_atom is emitted, so we must > delay emitting the atom until after the flush. > > Fixes: 0fe0320dc074 ("radeonsi: use optimal packet order when doing a > pipeline sync") > --- > src/gallium/drivers/radeon/r600_pipe_common.h | 3 ++- > src/gallium/drivers/radeon/r600_query.c | 9 ++--- > src/gallium/drivers/radeonsi/si_state_draw.c | 15 ++- > 3 files changed, 18 insertions(+), 9 deletions(-) > > diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h > b/src/gallium/drivers/radeon/r600_pipe_common.h > index dca56734cd7..f78e38b65af 100644 > --- a/src/gallium/drivers/radeon/r600_pipe_common.h > +++ b/src/gallium/drivers/radeon/r600_pipe_common.h > @@ -54,21 +54,22 @@ struct u_log_context; > #define R600_RESOURCE_FLAG_TRANSFER(PIPE_RESOURCE_FLAG_DRV_PRIV > << 0) > #define R600_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV > << 1) > #define R600_RESOURCE_FLAG_FORCE_TILING > (PIPE_RESOURCE_FLAG_DRV_PRIV << 2) > #define R600_RESOURCE_FLAG_DISABLE_DCC (PIPE_RESOURCE_FLAG_DRV_PRIV > << 3) > #define R600_RESOURCE_FLAG_UNMAPPABLE (PIPE_RESOURCE_FLAG_DRV_PRIV > << 4) > > #define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0) > /* Pipeline & streamout query controls. */ > #define R600_CONTEXT_START_PIPELINE_STATS (1u << 1) > #define R600_CONTEXT_STOP_PIPELINE_STATS (1u << 2) > -#define R600_CONTEXT_PRIVATE_FLAG (1u << 3) > +#define R600_CONTEXT_FLUSH_FOR_RENDER_COND (1u << 3) > +#define R600_CONTEXT_PRIVATE_FLAG (1u << 4) > > /* special primitive types */ > #define R600_PRIM_RECTANGLE_LIST PIPE_PRIM_MAX > > #define R600_NOT_QUERY 0x > > /* Debug flags. */ > /* logging and features */ > #define DBG_TEX(1 << 0) > #define DBG_NIR(1 << 1) > diff --git a/src/gallium/drivers/radeon/r600_query.c > b/src/gallium/drivers/radeon/r600_query.c > index f937612bc1f..03ff1018a71 100644 > --- a/src/gallium/drivers/radeon/r600_query.c > +++ b/src/gallium/drivers/radeon/r600_query.c > @@ -1828,25 +1828,28 @@ static void r600_render_condition(struct pipe_context > *ctx, > * from launching the compute grid. > */ > rctx->render_cond = NULL; > > ctx->get_query_result_resource( > ctx, query, true, PIPE_QUERY_TYPE_U64, 0, > &rquery->workaround_buf->b.b, > rquery->workaround_offset); > > /* Settings this in the render cond atom is too late, > * so set it here. */ > - rctx->flags |= rctx->screen->barrier_flags.L2_to_cp; > - > - atom->num_dw = 5; > + rctx->flags |= rctx->screen->barrier_flags.L2_to_cp | > + R600_CONTEXT_FLUSH_FOR_RENDER_COND; > > rctx->render_cond_force_off = old_force_off; > + } > + > + if (needs_workaround) { > + atom->num_dw = 5; > } else { > for (qbuf = &rquery->buffer; qbuf; qbuf = > qbuf->previous) > atom->num_dw += (qbuf->results_end / > rquery->result_size) * 5; > > if (rquery->b.type == > PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) > atom->num_dw *= R600_MAX_STREAMS; > } > } > > rctx->render_cond = query; > diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c > b/src/gallium/drivers/radeonsi/si_state_draw.c > index 1d8be49a480..81751d2186e 100644 > --- a/src/gallium/drivers/radeonsi/si_state_draw.c > +++ b/src/gallium/drivers/radeonsi/si_state_draw.c > @@ -1385,34 +1385,39 @@ void si_draw_vbo(struct pipe_context *ctx, const > struct pipe_draw_info *info) > SI_CONTEXT_PS_PARTIAL_FLUSH | > SI_CONTEXT_CS_PARTIAL_FLUSH))) { > /* If we have to wait for idle, set all states first, so that > all > * SET packets are processed in parallel with previous draw > calls. > * Then upload descriptors, set shader pointers, and draw, and > * prefetch at the end. This ensures that the time the CUs > * are idle is very short. (there are only SET_SH packets > between > * the wait and the draw) > */ > struct r600_atom *shader_pointers = > &sctx->shader_pointers.atom; > + unsigned masked_atoms = 1u << shader_pointers->id; > > - /* Emit all states except shader pointers
[Mesa-dev] [PATCH] egl/dri2: only destroy created objects
dri2_display_destroy may be called by dri2_initialize_wayland_drm() if initialization fails. In this case, these objects may not be initialized. Signed-off-by: Michael Olbrich --- src/egl/drivers/dri2/egl_dri2.c | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index a197e0456fd2..d86c6af225fc 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -944,9 +944,12 @@ dri2_display_destroy(_EGLDisplay *disp) zwp_linux_dmabuf_v1_destroy(dri2_dpy->wl_dmabuf); if (dri2_dpy->wl_shm) wl_shm_destroy(dri2_dpy->wl_shm); - wl_registry_destroy(dri2_dpy->wl_registry); - wl_event_queue_destroy(dri2_dpy->wl_queue); - wl_proxy_wrapper_destroy(dri2_dpy->wl_dpy_wrapper); + if (dri2_dpy->wl_registry) + wl_registry_destroy(dri2_dpy->wl_registry); + if (dri2_dpy->wl_queue) + wl_event_queue_destroy(dri2_dpy->wl_queue); + if (dri2_dpy->wl_dpy_wrapper) + wl_proxy_wrapper_destroy(dri2_dpy->wl_dpy_wrapper); u_vector_finish(&dri2_dpy->wl_modifiers.argb); u_vector_finish(&dri2_dpy->wl_modifiers.xrgb); u_vector_finish(&dri2_dpy->wl_modifiers.rgb565); -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] V2 radeonsi use STD430 packing of UBOs by default
I have to conclude that I don't see a way to use LOAD with CONSTBUF and keep the same performance as before. It looks like there are some deficiencies in our compiler stack that are unfixable in Mesa alone. Marek On Wed, Aug 30, 2017 at 2:11 AM, Marek Olšák wrote: > Related IRC discussion: > > 00:01 < mareko> arsenm: what are the chances I can convince you to > allow me to set mayLoad = 0 on s_buffer_load_dword? :) the instruction > always reads from read-only memory with Mesa > 00:02 < mareko> apparently, readnone doesn't get through > 00:02 < arsenm> mareko: you should get the same effect by having > invariant on the MMO > 00:03 < mareko> arsenm: and how would I set invariant on SI.load.const? > 00:04 < arsenm> mareko: we create MMOs for a few other intrinsics > already, it should be the same > 00:05 < mareko> if only I had time to play with LLVM > 00:05 < arsenm> mareko: it looks like that is already done so it might > be a more specific problem > 00:05 < arsenm> that rematerializable scalar loads patch is probably > OK now though > 00:07 < arsenm> https://reviews.llvm.org/D11621 > > Marek > > > On Wed, Aug 30, 2017 at 1:58 AM, Marek Olšák wrote: >> Interesting. It may be that glsl_to_tgsi uses copy propagation to fold >> those CONST loads into operands, which puts them next to their uses in LLVM. >> >> I guess LLVM doesn't understand that s_buffer_load_dword loads from >> immutable dereferenceable memory. It would benefit from mayLoad = 0 in >> this case I think. >> >> Marek >> >> On Thu, Aug 24, 2017 at 11:48 AM, Timothy Arceri >> wrote: >>> >>> >>> On 24/08/17 18:12, Nicolai Hähnle wrote: On 24.08.2017 09:45, Timothy Arceri wrote: > > > > On 22/08/17 22:14, Timothy Arceri wrote: >> >> I'm a little unsure what to do with this now. Below is my shader-db >> results, the majority of negative changes are from Natural Selection >> 2. >> >> I looked at some dumps of the worst Natural Selection 2 shaders and >> it seems to just be scheduling differences causing the regressions. >> >> I tested with sisched but that just made things even worse. >> >> Obviously we should be aiming to improve the schedulare, but since >> this regresses things and I have no evidence of it helping anything >> it makes the case for adding it pretty weak. >> >> Thoughts?? >> >> PERCENTAGE DELTASShaders SGPRs VGPRs SpillSGPR MaxWaves >> >> All affected57972.92 3.05 %5.04 % -2.94 >> --- >> Total 722870.28 %0.34 %0.33 % -0.21 % >> >> ___ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev >> > > > As far as I can tell this is because after this chnage we end up with > large sections of consecutive loads. Any thoughts on avoid this? Odd. Do you see the same change in TGSI? This is one of those things that ideally LLVM would be smart about, but unfortunately it isn't really. >>> >>> >>> Yeah I assume it's very doable since SSA makes this stuff reasonably easy to >>> deal with. However I'm not really sure where to begin, or how welcome a pass >>> to do this sorting would be. We have a similar pass in nir for moving >>> comparisons to where they are first used. >>> >>> The TGSI is introduces an extra temp to store the value of the LOAD, this is >>> probably what triggers the difference in LLVM. >>> >>> eg. >>> >>> LOAD TEMP[61], UBO[2], IMM[2]. >>> LOAD TEMP[62], UBO[2], IMM[1]. >>> LOAD TEMP[63], UBO[2], IMM[1]. >>> LOAD TEMP[64], UBO[2], IMM[2]. >>> DP4 TEMP[65].x, TEMP[60], TEMP[61] >>> DP4 TEMP[66].x, TEMP[60], TEMP[62] >>> MOV TEMP[65].y, TEMP[66]. >>> DP4 TEMP[67].x, TEMP[60], TEMP[63] >>> MOV TEMP[65].z, TEMP[67]. >>> DP4 TEMP[68].x, TEMP[60], TEMP[64] >>> MOV TEMP[69].w, TEMP[68]. >>> MOV TEMP[69].xyz, TEMP[65].xyzx >>> LOAD TEMP[70], UBO[1], IMM[6]. >>> LOAD TEMP[71], UBO[1], IMM[6]. >>> DP4 TEMP[72].x, TEMP[69], TEMP[70] >>> DP4 TEMP[73].x, TEMP[69], TEMP[71] >>> LOAD TEMP[74], UBO[1], IMM[6]. >>> LOAD TEMP[75], UBO[1], IMM[7]. >>> LOAD TEMP[76], UBO[1], IMM[7]. >>> LOAD TEMP[77], UBO[1], IMM[7]. >>> DP4 TEMP[78].x, TEMP[69], TEMP[74] >>> DP4 TEMP[79].x, TEMP[69], TEMP[75] >>> MOV TEMP[78].y, TEMP[79]. >>> DP4 TEMP[80].x, TEMP[69], TEMP[76] >>> MOV TEMP[78].z, TEMP[80]. >>> DP4 TEMP[81].x, TEMP[69], TEMP[77] >>> MOV TEMP[78].w, TEMP[81]. >>> >>> vs >>> >>> DP4 TEMP[63].x, TEMP[62], CONST[2][0] >>> DP4 TEMP[64].x, TEMP[62], CONST[2][1] >>> MOV TEMP[63].y, TEMP[64]. >>> DP4 TEMP[65].x, TEMP[62], CONST[2][2] >>> MOV TEMP[63].z, TEMP[65].xxx
Re: [Mesa-dev] [PATCH] llvmpipe: initialize llvmpipe->dirty with LP_NEW_SCISSOR
Looks good to me, thanks. Reviewed-by: Roland Scheidegger Albeit I'm not quite sure why it is never set here? This bug looks similar in nature to the uninitialized fb (see 8bfe451ed30918244618608871423b2a72cf9767) and I thought that was impossible to hit with gl state tracker... Roland Am 28.08.2017 um 22:20 schrieb Brian Paul: > If llvmpipe_set_scissor_states() is never called, we still need to be sure > that derived scissor/clip state is updated. As of commit 743ad599a97d09b1 > that function might not be called. > > Fixes regressed Piglit gl-1.0-scissor-offscreen -fbo -auto test. > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101709 > --- > src/gallium/drivers/llvmpipe/lp_context.c | 6 ++ > 1 file changed, 6 insertions(+) > > diff --git a/src/gallium/drivers/llvmpipe/lp_context.c > b/src/gallium/drivers/llvmpipe/lp_context.c > index 599a9f1..613d60f 100644 > --- a/src/gallium/drivers/llvmpipe/lp_context.c > +++ b/src/gallium/drivers/llvmpipe/lp_context.c > @@ -227,6 +227,12 @@ llvmpipe_create_context(struct pipe_screen *screen, void > *priv, > > lp_reset_counters(); > > + /* If llvmpipe_set_scissor_states() is never called, we still need to > +* make sure that derived scissor state is computed. > +* See https://bugs.freedesktop.org/show_bug.cgi?id=101709 > +*/ > + llvmpipe->dirty |= LP_NEW_SCISSOR; > + > return &llvmpipe->pipe; > > fail: > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] V2 radeonsi use STD430 packing of UBOs by default
Related IRC discussion: 00:01 < mareko> arsenm: what are the chances I can convince you to allow me to set mayLoad = 0 on s_buffer_load_dword? :) the instruction always reads from read-only memory with Mesa 00:02 < mareko> apparently, readnone doesn't get through 00:02 < arsenm> mareko: you should get the same effect by having invariant on the MMO 00:03 < mareko> arsenm: and how would I set invariant on SI.load.const? 00:04 < arsenm> mareko: we create MMOs for a few other intrinsics already, it should be the same 00:05 < mareko> if only I had time to play with LLVM 00:05 < arsenm> mareko: it looks like that is already done so it might be a more specific problem 00:05 < arsenm> that rematerializable scalar loads patch is probably OK now though 00:07 < arsenm> https://reviews.llvm.org/D11621 Marek On Wed, Aug 30, 2017 at 1:58 AM, Marek Olšák wrote: > Interesting. It may be that glsl_to_tgsi uses copy propagation to fold > those CONST loads into operands, which puts them next to their uses in LLVM. > > I guess LLVM doesn't understand that s_buffer_load_dword loads from > immutable dereferenceable memory. It would benefit from mayLoad = 0 in > this case I think. > > Marek > > On Thu, Aug 24, 2017 at 11:48 AM, Timothy Arceri > wrote: >> >> >> On 24/08/17 18:12, Nicolai Hähnle wrote: >>> >>> On 24.08.2017 09:45, Timothy Arceri wrote: On 22/08/17 22:14, Timothy Arceri wrote: > > I'm a little unsure what to do with this now. Below is my shader-db > results, the majority of negative changes are from Natural Selection > 2. > > I looked at some dumps of the worst Natural Selection 2 shaders and > it seems to just be scheduling differences causing the regressions. > > I tested with sisched but that just made things even worse. > > Obviously we should be aiming to improve the schedulare, but since > this regresses things and I have no evidence of it helping anything > it makes the case for adding it pretty weak. > > Thoughts?? > > PERCENTAGE DELTASShaders SGPRs VGPRs SpillSGPR MaxWaves > > All affected57972.92 3.05 %5.04 % -2.94 > --- > Total 722870.28 %0.34 %0.33 % -0.21 % > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > As far as I can tell this is because after this chnage we end up with large sections of consecutive loads. Any thoughts on avoid this? >>> >>> >>> Odd. Do you see the same change in TGSI? >>> >>> This is one of those things that ideally LLVM would be smart about, but >>> unfortunately it isn't really. >> >> >> Yeah I assume it's very doable since SSA makes this stuff reasonably easy to >> deal with. However I'm not really sure where to begin, or how welcome a pass >> to do this sorting would be. We have a similar pass in nir for moving >> comparisons to where they are first used. >> >> The TGSI is introduces an extra temp to store the value of the LOAD, this is >> probably what triggers the difference in LLVM. >> >> eg. >> >> LOAD TEMP[61], UBO[2], IMM[2]. >> LOAD TEMP[62], UBO[2], IMM[1]. >> LOAD TEMP[63], UBO[2], IMM[1]. >> LOAD TEMP[64], UBO[2], IMM[2]. >> DP4 TEMP[65].x, TEMP[60], TEMP[61] >> DP4 TEMP[66].x, TEMP[60], TEMP[62] >> MOV TEMP[65].y, TEMP[66]. >> DP4 TEMP[67].x, TEMP[60], TEMP[63] >> MOV TEMP[65].z, TEMP[67]. >> DP4 TEMP[68].x, TEMP[60], TEMP[64] >> MOV TEMP[69].w, TEMP[68]. >> MOV TEMP[69].xyz, TEMP[65].xyzx >> LOAD TEMP[70], UBO[1], IMM[6]. >> LOAD TEMP[71], UBO[1], IMM[6]. >> DP4 TEMP[72].x, TEMP[69], TEMP[70] >> DP4 TEMP[73].x, TEMP[69], TEMP[71] >> LOAD TEMP[74], UBO[1], IMM[6]. >> LOAD TEMP[75], UBO[1], IMM[7]. >> LOAD TEMP[76], UBO[1], IMM[7]. >> LOAD TEMP[77], UBO[1], IMM[7]. >> DP4 TEMP[78].x, TEMP[69], TEMP[74] >> DP4 TEMP[79].x, TEMP[69], TEMP[75] >> MOV TEMP[78].y, TEMP[79]. >> DP4 TEMP[80].x, TEMP[69], TEMP[76] >> MOV TEMP[78].z, TEMP[80]. >> DP4 TEMP[81].x, TEMP[69], TEMP[77] >> MOV TEMP[78].w, TEMP[81]. >> >> vs >> >> DP4 TEMP[63].x, TEMP[62], CONST[2][0] >> DP4 TEMP[64].x, TEMP[62], CONST[2][1] >> MOV TEMP[63].y, TEMP[64]. >> DP4 TEMP[65].x, TEMP[62], CONST[2][2] >> MOV TEMP[63].z, TEMP[65]. >> DP4 TEMP[66].x, TEMP[62], CONST[2][3] >> MOV TEMP[67].w, TEMP[66]. >> MOV TEMP[67].xyz, TEMP[63].xyzx >> DP4 TEMP[68].x, TEMP[67], CONST[1][14] >> DP4 TEMP[69].x, TEMP[67], CONST[1][15] >> DP4 TEMP[70].x, TEMP[67], CONST[1][8] >> DP4 TEMP[71].x, TEMP[67], CONST[1][9] >> MOV TEMP[70].y, TEMP[71]. >> DP4 TEMP[72].x, TEMP[67], CONST[1][10] >> MOV TEMP[70].z, TEMP[72]. >> DP4 TEMP[73].
Re: [Mesa-dev] V2 radeonsi use STD430 packing of UBOs by default
Interesting. It may be that glsl_to_tgsi uses copy propagation to fold those CONST loads into operands, which puts them next to their uses in LLVM. I guess LLVM doesn't understand that s_buffer_load_dword loads from immutable dereferenceable memory. It would benefit from mayLoad = 0 in this case I think. Marek On Thu, Aug 24, 2017 at 11:48 AM, Timothy Arceri wrote: > > > On 24/08/17 18:12, Nicolai Hähnle wrote: >> >> On 24.08.2017 09:45, Timothy Arceri wrote: >>> >>> >>> >>> On 22/08/17 22:14, Timothy Arceri wrote: I'm a little unsure what to do with this now. Below is my shader-db results, the majority of negative changes are from Natural Selection 2. I looked at some dumps of the worst Natural Selection 2 shaders and it seems to just be scheduling differences causing the regressions. I tested with sisched but that just made things even worse. Obviously we should be aiming to improve the schedulare, but since this regresses things and I have no evidence of it helping anything it makes the case for adding it pretty weak. Thoughts?? PERCENTAGE DELTASShaders SGPRs VGPRs SpillSGPR MaxWaves All affected57972.92 3.05 %5.04 % -2.94 --- Total 722870.28 %0.34 %0.33 % -0.21 % ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev >>> >>> >>> As far as I can tell this is because after this chnage we end up with >>> large sections of consecutive loads. Any thoughts on avoid this? >> >> >> Odd. Do you see the same change in TGSI? >> >> This is one of those things that ideally LLVM would be smart about, but >> unfortunately it isn't really. > > > Yeah I assume it's very doable since SSA makes this stuff reasonably easy to > deal with. However I'm not really sure where to begin, or how welcome a pass > to do this sorting would be. We have a similar pass in nir for moving > comparisons to where they are first used. > > The TGSI is introduces an extra temp to store the value of the LOAD, this is > probably what triggers the difference in LLVM. > > eg. > > LOAD TEMP[61], UBO[2], IMM[2]. > LOAD TEMP[62], UBO[2], IMM[1]. > LOAD TEMP[63], UBO[2], IMM[1]. > LOAD TEMP[64], UBO[2], IMM[2]. > DP4 TEMP[65].x, TEMP[60], TEMP[61] > DP4 TEMP[66].x, TEMP[60], TEMP[62] > MOV TEMP[65].y, TEMP[66]. > DP4 TEMP[67].x, TEMP[60], TEMP[63] > MOV TEMP[65].z, TEMP[67]. > DP4 TEMP[68].x, TEMP[60], TEMP[64] > MOV TEMP[69].w, TEMP[68]. > MOV TEMP[69].xyz, TEMP[65].xyzx > LOAD TEMP[70], UBO[1], IMM[6]. > LOAD TEMP[71], UBO[1], IMM[6]. > DP4 TEMP[72].x, TEMP[69], TEMP[70] > DP4 TEMP[73].x, TEMP[69], TEMP[71] > LOAD TEMP[74], UBO[1], IMM[6]. > LOAD TEMP[75], UBO[1], IMM[7]. > LOAD TEMP[76], UBO[1], IMM[7]. > LOAD TEMP[77], UBO[1], IMM[7]. > DP4 TEMP[78].x, TEMP[69], TEMP[74] > DP4 TEMP[79].x, TEMP[69], TEMP[75] > MOV TEMP[78].y, TEMP[79]. > DP4 TEMP[80].x, TEMP[69], TEMP[76] > MOV TEMP[78].z, TEMP[80]. > DP4 TEMP[81].x, TEMP[69], TEMP[77] > MOV TEMP[78].w, TEMP[81]. > > vs > > DP4 TEMP[63].x, TEMP[62], CONST[2][0] > DP4 TEMP[64].x, TEMP[62], CONST[2][1] > MOV TEMP[63].y, TEMP[64]. > DP4 TEMP[65].x, TEMP[62], CONST[2][2] > MOV TEMP[63].z, TEMP[65]. > DP4 TEMP[66].x, TEMP[62], CONST[2][3] > MOV TEMP[67].w, TEMP[66]. > MOV TEMP[67].xyz, TEMP[63].xyzx > DP4 TEMP[68].x, TEMP[67], CONST[1][14] > DP4 TEMP[69].x, TEMP[67], CONST[1][15] > DP4 TEMP[70].x, TEMP[67], CONST[1][8] > DP4 TEMP[71].x, TEMP[67], CONST[1][9] > MOV TEMP[70].y, TEMP[71]. > DP4 TEMP[72].x, TEMP[67], CONST[1][10] > MOV TEMP[70].z, TEMP[72]. > DP4 TEMP[73].x, TEMP[67], CONST[1][11] > MOV TEMP[70].w, TEMP[73]. > MOV TEMP[74].xyw, TEMP[70].xyxw > >> >> Cheers, >> Nicolai >> > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/7] radeonsi: make use of LOAD for UBOs
On 30/08/17 09:39, Marek Olšák wrote: Did you check shader-db for differences in code generation? Yes, for more details and discussion please see the cover letter. Thanks, Tim Marek On Tue, Aug 22, 2017 at 2:14 PM, Timothy Arceri wrote: v2: always set can_speculate and allow_smem to true --- src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 31 +++ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index f8c99ff7e7..83cd8cd938 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -66,32 +66,36 @@ static LLVMValueRef get_buffer_size( LLVMConstInt(ctx->i32, 0x3FFF, 0), ""); size = LLVMBuildUDiv(builder, size, stride, ""); } return size; } static LLVMValueRef shader_buffer_fetch_rsrc(struct si_shader_context *ctx, -const struct tgsi_full_src_register *reg) +const struct tgsi_full_src_register *reg, +bool ubo) { LLVMValueRef index; if (!reg->Register.Indirect) { index = LLVMConstInt(ctx->i32, reg->Register.Index, false); } else { index = si_get_indirect_index(ctx, ®->Indirect, reg->Register.Index); } - return ctx->abi.load_ssbo(&ctx->abi, index, false); + if (ubo) + return ctx->abi.load_ubo(&ctx->abi, index); + else + return ctx->abi.load_ssbo(&ctx->abi, index, false); } static bool tgsi_is_array_sampler(unsigned target) { return target == TGSI_TEXTURE_1D_ARRAY || target == TGSI_TEXTURE_SHADOW1D_ARRAY || target == TGSI_TEXTURE_2D_ARRAY || target == TGSI_TEXTURE_SHADOW2D_ARRAY || target == TGSI_TEXTURE_CUBE_ARRAY || target == TGSI_TEXTURE_SHADOWCUBE_ARRAY || @@ -356,26 +360,28 @@ static void load_fetch_args( struct lp_build_emit_data * emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = &ctx->gallivm; const struct tgsi_full_instruction * inst = emit_data->inst; unsigned target = inst->Memory.Texture; LLVMValueRef rsrc; emit_data->dst_type = ctx->v4f32; - if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { + if (inst->Src[0].Register.File == TGSI_FILE_BUFFER || + inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) { LLVMBuilderRef builder = gallivm->builder; LLVMValueRef offset; LLVMValueRef tmp; - rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0]); + bool ubo = inst->Src[0].Register.File == TGSI_FILE_CONSTBUF; + rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], ubo); tmp = lp_build_emit_fetch(bld_base, inst, 1, 0); offset = LLVMBuildBitCast(builder, tmp, ctx->i32, ""); buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0, offset, false, false); } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE || tgsi_is_bindless_image_file(inst->Src[0].Register.File)) { LLVMValueRef coords; @@ -407,21 +413,21 @@ static unsigned get_load_intr_attribs(bool can_speculate) static unsigned get_store_intr_attribs(bool writeonly_memory) { return writeonly_memory && HAVE_LLVM >= 0x0400 ? LP_FUNC_ATTR_INACCESSIBLE_MEM_ONLY : LP_FUNC_ATTR_WRITEONLY; } static void load_emit_buffer(struct si_shader_context *ctx, struct lp_build_emit_data *emit_data, -bool can_speculate) +bool can_speculate, bool allow_smem) { const struct tgsi_full_instruction *inst = emit_data->inst; uint writemask = inst->Dst[0].Register.WriteMask; uint count = util_last_bit(writemask); LLVMValueRef *args = emit_data->args; /* Don't use SMEM for shader buffer loads, because LLVM doesn't * select SMEM for SI.load.const with a non-constant offset, and * constant offsets practically don't exist with shader buffers. * @@ -434,21 +440,21 @@ static void load_emit_buffer(struct si_shader_context *ctx, * After that, si_memory_barrier should invalidate sL1 for shader * buffers. */ assert(LLVMConstIntGetZExtValue(args[1]) == 0); /* vindex */ emit_data->output[emit_data->chan] = ac_build_buffer_load(&ctx->ac, args[0], count, NULL,
Re: [Mesa-dev] [PATCH 7/7] radeonsi: enable STD430 packing of UBOs by default
On Tue, Aug 22, 2017 at 2:14 PM, Timothy Arceri wrote: > Before this change we were defaulting to STD140 which is slightly > less efficient at packing arrays. > --- > src/gallium/drivers/radeonsi/si_pipe.c | 2 +- > src/mesa/state_tracker/st_context.c| 3 +++ > 2 files changed, 4 insertions(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/radeonsi/si_pipe.c > b/src/gallium/drivers/radeonsi/si_pipe.c > index 1d1db02c76..b41a6f5ce7 100644 > --- a/src/gallium/drivers/radeonsi/si_pipe.c > +++ b/src/gallium/drivers/radeonsi/si_pipe.c > @@ -522,20 +522,21 @@ static int si_get_param(struct pipe_screen* pscreen, > enum pipe_cap param) > case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS: > case PIPE_CAP_DOUBLES: > case PIPE_CAP_TGSI_TEX_TXF_LZ: > case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT: > case PIPE_CAP_BINDLESS_TEXTURE: > case PIPE_CAP_QUERY_TIMESTAMP: > case PIPE_CAP_QUERY_TIME_ELAPSED: > case PIPE_CAP_NIR_SAMPLERS_AS_DEREF: > case PIPE_CAP_QUERY_SO_OVERFLOW: > case PIPE_CAP_MEMOBJ: > + case PIPE_CAP_LOAD_CONSTBUF: > return 1; > > case PIPE_CAP_INT64: > case PIPE_CAP_INT64_DIVMOD: > case PIPE_CAP_TGSI_CLOCK: > case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX: > case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: > return 1; > > case PIPE_CAP_TGSI_VOTE: > @@ -611,21 +612,20 @@ static int si_get_param(struct pipe_screen* pscreen, > enum pipe_cap param) > case PIPE_CAP_TEXTURE_GATHER_OFFSETS: > case PIPE_CAP_VERTEXID_NOBASE: > case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES: > case PIPE_CAP_MAX_WINDOW_RECTANGLES: > case PIPE_CAP_NATIVE_FENCE_FD: > case PIPE_CAP_TGSI_FS_FBFETCH: > case PIPE_CAP_TGSI_MUL_ZERO_WINS: > case PIPE_CAP_UMA: > case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE: > case PIPE_CAP_POST_DEPTH_COVERAGE: > - case PIPE_CAP_LOAD_CONSTBUF: > return 0; > > case PIPE_CAP_QUERY_BUFFER_OBJECT: > return si_have_tgsi_compute(sscreen); > > case PIPE_CAP_DRAW_PARAMETERS: > case PIPE_CAP_MULTI_DRAW_INDIRECT: > case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: > return sscreen->has_draw_indirect_multi; > > diff --git a/src/mesa/state_tracker/st_context.c > b/src/mesa/state_tracker/st_context.c > index ef2e73e741..bd990fc34a 100644 > --- a/src/mesa/state_tracker/st_context.c > +++ b/src/mesa/state_tracker/st_context.c > @@ -367,20 +367,23 @@ st_create_context_priv( struct gl_context *ctx, struct > pipe_context *pipe, > > /* Need these flags: > */ > ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; > > ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; > > if (no_error) >ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR; > > + ctx->Const.UseSTD430AsDefaultPacking = > + screen->get_param(screen, PIPE_CAP_LOAD_CONSTBUF); > + This st/mesa change should be a separate patch. Also, ctx->Const is typically initialized in st_extensions.c and CAPs are also checked mostly there. If you split this patch into two, you can add my Rb to both IF you initialize ctx->Const... in st_extensions.c. Other than that, for patches 1-3, 5-6: Reviewed-by: Marek Olšák I commented on patch 4. Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/7] radeonsi: make use of LOAD for UBOs
Did you check shader-db for differences in code generation? Marek On Tue, Aug 22, 2017 at 2:14 PM, Timothy Arceri wrote: > v2: always set can_speculate and allow_smem to true > --- > src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 31 > +++ > 1 file changed, 21 insertions(+), 10 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c > b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c > index f8c99ff7e7..83cd8cd938 100644 > --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c > +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c > @@ -66,32 +66,36 @@ static LLVMValueRef get_buffer_size( > LLVMConstInt(ctx->i32, 0x3FFF, 0), ""); > > size = LLVMBuildUDiv(builder, size, stride, ""); > } > > return size; > } > > static LLVMValueRef > shader_buffer_fetch_rsrc(struct si_shader_context *ctx, > -const struct tgsi_full_src_register *reg) > +const struct tgsi_full_src_register *reg, > +bool ubo) > { > LLVMValueRef index; > > if (!reg->Register.Indirect) { > index = LLVMConstInt(ctx->i32, reg->Register.Index, false); > } else { > index = si_get_indirect_index(ctx, ®->Indirect, > reg->Register.Index); > } > > - return ctx->abi.load_ssbo(&ctx->abi, index, false); > + if (ubo) > + return ctx->abi.load_ubo(&ctx->abi, index); > + else > + return ctx->abi.load_ssbo(&ctx->abi, index, false); > } > > static bool tgsi_is_array_sampler(unsigned target) > { > return target == TGSI_TEXTURE_1D_ARRAY || >target == TGSI_TEXTURE_SHADOW1D_ARRAY || >target == TGSI_TEXTURE_2D_ARRAY || >target == TGSI_TEXTURE_SHADOW2D_ARRAY || >target == TGSI_TEXTURE_CUBE_ARRAY || >target == TGSI_TEXTURE_SHADOWCUBE_ARRAY || > @@ -356,26 +360,28 @@ static void load_fetch_args( > struct lp_build_emit_data * emit_data) > { > struct si_shader_context *ctx = si_shader_context(bld_base); > struct gallivm_state *gallivm = &ctx->gallivm; > const struct tgsi_full_instruction * inst = emit_data->inst; > unsigned target = inst->Memory.Texture; > LLVMValueRef rsrc; > > emit_data->dst_type = ctx->v4f32; > > - if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { > + if (inst->Src[0].Register.File == TGSI_FILE_BUFFER || > + inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) { > LLVMBuilderRef builder = gallivm->builder; > LLVMValueRef offset; > LLVMValueRef tmp; > > - rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0]); > + bool ubo = inst->Src[0].Register.File == TGSI_FILE_CONSTBUF; > + rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], ubo); > > tmp = lp_build_emit_fetch(bld_base, inst, 1, 0); > offset = LLVMBuildBitCast(builder, tmp, ctx->i32, ""); > > buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0, >offset, false, false); > } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE || >tgsi_is_bindless_image_file(inst->Src[0].Register.File)) { > LLVMValueRef coords; > > @@ -407,21 +413,21 @@ static unsigned get_load_intr_attribs(bool > can_speculate) > > static unsigned get_store_intr_attribs(bool writeonly_memory) > { > return writeonly_memory && HAVE_LLVM >= 0x0400 ? > LP_FUNC_ATTR_INACCESSIBLE_MEM_ONLY : > LP_FUNC_ATTR_WRITEONLY; > } > > static void load_emit_buffer(struct si_shader_context *ctx, > struct lp_build_emit_data *emit_data, > -bool can_speculate) > +bool can_speculate, bool allow_smem) > { > const struct tgsi_full_instruction *inst = emit_data->inst; > uint writemask = inst->Dst[0].Register.WriteMask; > uint count = util_last_bit(writemask); > LLVMValueRef *args = emit_data->args; > > /* Don't use SMEM for shader buffer loads, because LLVM doesn't > * select SMEM for SI.load.const with a non-constant offset, and > * constant offsets practically don't exist with shader buffers. > * > @@ -434,21 +440,21 @@ static void load_emit_buffer(struct si_shader_context > *ctx, > * After that, si_memory_barrier should invalidate sL1 for > shader > * buffers. > */ > > assert(LLVMConstIntGetZExtValue(args[1]) == 0); /* vindex */ > emit_data->output[emit_data->chan] = > ac_build_buffer_load(&ctx->ac, args[0
Re: [Mesa-dev] [PATCH 5/8] glcpp: Avoid unnecessary call to strlen
On 30/08/17 05:56, Thomas Helland wrote: Length of the token was already calculated by flex and stored in yyleng, no need to implicitly call strlen() via linear_strdup(). --- src/compiler/glsl/glcpp/glcpp-lex.l | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/compiler/glsl/glcpp/glcpp-lex.l b/src/compiler/glsl/glcpp/glcpp-lex.l index 381b97364a..93e5cb3845 100644 --- a/src/compiler/glsl/glcpp/glcpp-lex.l +++ b/src/compiler/glsl/glcpp/glcpp-lex.l @@ -101,7 +101,8 @@ void glcpp_set_column (int column_no , yyscan_t yyscanner); #define RETURN_STRING_TOKEN(token)\ do {\ if (! parser->skipping) {\ - yylval->str = linear_strdup(yyextra->linalloc, yytext); \ + yylval->str = linear_alloc_child(yyextra->linalloc, yyleng + 1); \ + memcpy(yylval->str, yytext, yyleng + 1);\ RETURN_TOKEN_NEVER_SKIP (token);\ } \ } while(0) Copy and pasted comments from the last time this was sent: Seems reasonable. This pattern is also happening in glsl_lexer.ll maybe we should do this there also? It might also make sense to add a comment here to say why we don't use linear_strdup(), so the optimisation doesn't get removed in future. With that: Reviewed-by: Timothy Arceri ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/5] intel/isl: Set MOCS based on view usage
On August 29, 2017 3:00:51 PM Kenneth Graunke wrote: On Tuesday, August 1, 2017 3:48:29 PM PDT Jason Ekstrand wrote: This little series changes things around so that, instead of passing MOCS values into ISL, ISL knows how to set them itself. This allows us to centralize some of the decisions about how MOCS gets set for surfaces and hopefully, if we ever do anything crazy in the future, we can share it between GL and Vulkan. Unfortunately, surfaces are not the only places where MOCS is used. It also shows up in vertex buffers, index buffers, and streamout buffers. However those are always set to the platform equivalent of I915_MOCS_CACHED (and that's not all that liable to change) so they're not particularly interesting. If people like this approach, I'd like to Cc it to stable for 17.2 because it has the side-effect of making Vulkan MOCS a bit more sane. Jason Ekstrand (5): intel/isl: Set MOCS based on usage for surface states intel/blorp: Delete the MOCS plumbing i965: Stop passing MOCS information into ISL anv: Stop passing MOCS information into ISL intel/isl: Get rid of the mocs fields in fill/emit_info src/intel/blorp/blorp.h | 6 --- src/intel/blorp/blorp_genX_exec.h| 37 +++-- src/intel/isl/isl.h | 22 -- src/intel/isl/isl_emit_depth_stencil.c | 12 +++--- src/intel/isl/isl_genX_mocs.h| 53 src/intel/isl/isl_surface_state.c| 9 ++-- src/intel/vulkan/anv_blorp.c | 3 -- src/intel/vulkan/anv_device.c| 1 - src/intel/vulkan/anv_image.c | 12 ++ src/intel/vulkan/anv_private.h | 2 - src/intel/vulkan/genX_cmd_buffer.c | 13 ++ src/intel/vulkan/genX_state.c| 3 -- src/mesa/drivers/dri/i965/brw_blorp.c| 15 --- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 26 ++-- 14 files changed, 101 insertions(+), 113 deletions(-) create mode 100644 src/intel/isl/isl_genX_mocs.h I guess this is okay. I'm not crazy about it, mostly because the logic is still spread out in a bunch of places. If there was a vertex buffer usage bit and we could handle that there, I think that would be nice, even if it doesn't otherwise seem relevant to ISL. *shrug* Yeah, vertex buffers make this a bit awkward. I wish there were a better way than scattering things around more. I'm honestly not sure if this series improves the scattering or not. Having ISL do this itself rather than passing in values makes sense. Reviewed-by: Kenneth Graunke ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] radv: Disable multilayer & multilevel DCC.
The current DCC init routine doesn't account for initializing a single layer or level. Multilayer seems hard for small textures on pre-GFX9 as tre metadata for the layers can be interleaved. For GFX9 multilevel textures are a problem for similar reasons. So just disable this for now, until we handle the texture modes correctly. Fixes: f4e499ec791 "radv: add initial non-conformant radv vulkan driver" --- src/amd/vulkan/radv_image.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index eb8ceef1db1..4b1bbefeecb 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -118,6 +118,7 @@ radv_init_surface(struct radv_device *device, VK_IMAGE_USAGE_STORAGE_BIT)) || (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) || (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) || +pCreateInfo->mipLevels > 1 || pCreateInfo->arrayLayers > 1 || device->physical_device->rad_info.chip_class < VI || create_info->scanout || (device->debug_flags & RADV_DEBUG_NO_DCC) || !radv_is_colorbuffer_format_supported(pCreateInfo->format, &blendable)) -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 6/6] i965: Use BLORP for buffer object stall avoidance blits instead of BLT.
On August 29, 2017 2:29:25 PM Kenneth Graunke wrote: Improves performance of GFXBench4 tests at 1024x768 on a Kabylake GT2: - Manhattan 3.1 by 1.32134% +/- 0.322734% (n=8). - Car Chase by 1.25607% +/- 0.291262% (n=5). Woohoo! That's not bad. Apart from my little niggle on patch 1, everything looks good to me. Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/intel_buffer_objects.c | 22 +++--- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c index 01443c2b7fc..49e68bd7392 100644 --- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c @@ -37,7 +37,7 @@ #include "x86/common_x86_asm.h" #include "brw_context.h" -#include "intel_blit.h" +#include "brw_blorp.h" #include "intel_buffer_objects.h" #include "intel_batchbuffer.h" #include "intel_tiled_memcpy.h" @@ -294,9 +294,9 @@ brw_buffer_subdata(struct gl_context *ctx, brw_bo_subdata(temp_bo, 0, size, data); - intel_emit_linear_blit(brw, -intel_obj->buffer, offset, + brw_blorp_copy_buffers(brw, temp_bo, 0, +intel_obj->buffer, offset, size); brw_emit_mi_flush(brw); @@ -533,11 +533,11 @@ brw_flush_mapped_buffer_range(struct gl_context *ctx, * another blit of that area and the complete newer data will land the * second time. */ - intel_emit_linear_blit(brw, - intel_obj->buffer, - obj->Mappings[index].Offset + offset, + brw_blorp_copy_buffers(brw, intel_obj->range_map_bo[index], intel_obj->map_extra[index] + offset, + intel_obj->buffer, + obj->Mappings[index].Offset + offset, length); mark_buffer_gpu_usage(intel_obj, obj->Mappings[index].Offset + offset, @@ -565,10 +565,10 @@ brw_unmap_buffer(struct gl_context *ctx, brw_bo_unmap(intel_obj->range_map_bo[index]); if (!(obj->Mappings[index].AccessFlags & GL_MAP_FLUSH_EXPLICIT_BIT)) { - intel_emit_linear_blit(brw, -intel_obj->buffer, obj->Mappings[index].Offset, + brw_blorp_copy_buffers(brw, intel_obj->range_map_bo[index], intel_obj->map_extra[index], +intel_obj->buffer, obj->Mappings[index].Offset, obj->Mappings[index].Length); mark_buffer_gpu_usage(intel_obj, obj->Mappings[index].Offset, obj->Mappings[index].Length); @@ -646,9 +646,9 @@ brw_copy_buffer_subdata(struct gl_context *ctx, dst_bo = intel_bufferobj_buffer(brw, intel_dst, write_offset, size, true); src_bo = intel_bufferobj_buffer(brw, intel_src, read_offset, size, false); - intel_emit_linear_blit(brw, - dst_bo, write_offset, - src_bo, read_offset, size); + brw_blorp_copy_buffers(brw, + src_bo, read_offset, + dst_bo, write_offset, size); /* Since we've emitted some blits to buffers that will (likely) be used * in rendering operations in other cache domains in this batch, emit a -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/6] blorp: Turn anv_CmdCopyBuffer into a blorp_buffer_copy() helper.
On August 29, 2017 3:14:08 PM Chris Wilson wrote: Quoting Kenneth Graunke (2017-08-29 22:28:38) Anvil already had code to copy between two buffer objects in the most efficient way possible, using large bpp copies, then smaller bpp copies. This patch moves that logic into BLORP as blorp_buffer_copy(), so we can reuse it in i965 as well. Just some food for thought... The render access pattern is terrible for wide linear buffers, as the pixel block ends up being scattered across many pages, thrashing the TLB. But if instead of telling it to copy as wide as possible, you just limited it to a stride of 4096/4 (if memory serves 4 is the largest pixel block height) then each pixel block is located inside just one page (and hopefully cluster across the eu, further reducing the stride may help if the spread ends up taller than wider). Yes, some benchmarking would have to be done to truly substantiate Ken's claim. I think you may be right that the access patterns aren't great. If we hit the case where you do a one pixel tall blit, that's also not good. I think there is a lot of time that could be sink into making this more efficient. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/6] blorp: Turn anv_CmdCopyBuffer into a blorp_buffer_copy() helper.
On August 29, 2017 2:29:26 PM Kenneth Graunke wrote: Anvil already had code to copy between two buffer objects in the most efficient way possible, using large bpp copies, then smaller bpp copies. This patch moves that logic into BLORP as blorp_buffer_copy(), so we can reuse it in i965 as well. --- src/intel/blorp/blorp.h | 6 ++ src/intel/blorp/blorp_blit.c | 128 +++ src/intel/vulkan/anv_blorp.c | 104 +++ 3 files changed, 141 insertions(+), 97 deletions(-) diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h index 4d9a44b0926..951a97f5daa 100644 --- a/src/intel/blorp/blorp.h +++ b/src/intel/blorp/blorp.h @@ -132,6 +132,12 @@ blorp_copy(struct blorp_batch *batch, uint32_t dst_x, uint32_t dst_y, uint32_t src_width, uint32_t src_height); +void +blorp_buffer_copy(struct blorp_batch *batch, + void *src_bo, uint64_t src_offset, + void *dst_bo, uint64_t dst_offset, + uint64_t size); + void blorp_fast_clear(struct blorp_batch *batch, const struct blorp_surf *surf, enum isl_format format, diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c index 35008cbbb0a..998fd9b6d39 100644 --- a/src/intel/blorp/blorp_blit.c +++ b/src/intel/blorp/blorp_blit.c @@ -2513,3 +2513,131 @@ blorp_copy(struct blorp_batch *batch, do_blorp_blit(batch, ¶ms, &wm_prog_key, &coords); } + +static enum isl_format +isl_format_for_size(unsigned size_B) +{ + switch (size_B) { + case 1: return ISL_FORMAT_R8_UINT; + case 2: return ISL_FORMAT_R8G8_UINT; + case 4: return ISL_FORMAT_R8G8B8A8_UINT; + case 8: return ISL_FORMAT_R16G16B16A16_UINT; + case 16: return ISL_FORMAT_R32G32B32A32_UINT; + default: + unreachable("Not a power-of-two format size"); + } +} + +/** + * Returns the greatest common divisor of a and b that is a power of two. + */ +static uint64_t +gcd_pow2_u64(uint64_t a, uint64_t b) +{ + assert(a > 0 || b > 0); + + unsigned a_log2 = ffsll(a) - 1; + unsigned b_log2 = ffsll(b) - 1; + + /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which +* case, the MIN2() will take the other one. If both are 0 then we will +* hit the assert above. +*/ + return 1 << MIN2(a_log2, b_log2); +} + +static void +do_buffer_copy(struct blorp_batch *batch, + void *src_bo, uint64_t src_offset, + void *dst_bo, uint64_t dst_offset, + int width, int height, int block_size) +{ + /* The actual format we pick doesn't matter as blorp will throw it away. +* The only thing that actually matters is the size. +*/ + enum isl_format format = isl_format_for_size(block_size); + + UNUSED bool ok; + struct isl_surf surf; + ok = isl_surf_init(batch->blorp->isl_dev, &surf, + .dim = ISL_SURF_DIM_2D, + .format = format, + .width = width, + .height = height, + .depth = 1, + .levels = 1, + .array_len = 1, + .samples = 1, + .row_pitch = width * block_size, + .usage = ISL_SURF_USAGE_TEXTURE_BIT | + ISL_SURF_USAGE_RENDER_TARGET_BIT, + .tiling_flags = ISL_TILING_LINEAR_BIT); + assert(ok); + + struct blorp_surf src_blorp_surf = { + .surf = &surf, + .addr = { + .buffer = src_bo, + .offset = src_offset, + }, + }; + + struct blorp_surf dst_blorp_surf = { + .surf = &surf, + .addr = { + .buffer = dst_bo, + .offset = dst_offset, + }, + }; + + blorp_copy(batch, &src_blorp_surf, 0, 0, &dst_blorp_surf, 0, 0, + 0, 0, 0, 0, width, height); +} + +/* This is maximum possible width/height our HW can handle */ +#define MAX_SURFACE_DIM (1ull << 14) + +void +blorp_buffer_copy(struct blorp_batch *batch, + void *src_bo, uint64_t src_offset, + void *dst_bo, uint64_t dst_offset, I think I have a mild preference for making this take a pair of blorp_address structs instead of void pointers and offsets. Of course, if that's a pain for some reason, this is fine. + uint64_t size) +{ + uint64_t copy_size = size; + + /* First, we compute the biggest format that can be used with the +* given offsets and size. +*/ + int bs = 16; + bs = gcd_pow2_u64(bs, src_offset); + bs = gcd_pow2_u64(bs, dst_offset); + bs = gcd_pow2_u64(bs, size); + + /* First, we make a bunch of max-sized copies */ + uint64_t max_copy_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs; + while (copy_size >= max_copy_size) { + do_buffer_copy(batch, src_bo, src_offset, dst_bo, dst_offset, + MAX_SURFACE_DIM, MAX_SURFACE_DIM, bs); + copy_size -= max_copy_size; +
[Mesa-dev] [PATCH 2/2] radeonsi: eliminate PS color outputs when colormask kills them
From: Marek Olšák --- src/gallium/drivers/radeonsi/si_state.c | 4 src/gallium/drivers/radeonsi/si_state.h | 1 + src/gallium/drivers/radeonsi/si_state_shaders.c | 1 + 3 files changed, 6 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 41b08f8..5ee8bb9 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -434,20 +434,22 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | S_028B70_ALPHA_TO_MASK_OFFSET0(2) | S_028B70_ALPHA_TO_MASK_OFFSET1(2) | S_028B70_ALPHA_TO_MASK_OFFSET2(2) | S_028B70_ALPHA_TO_MASK_OFFSET3(2)); if (state->alpha_to_coverage) blend->need_src_alpha_4bit |= 0xf; blend->cb_target_mask = 0; + blend->cb_target_enabled_4bit = 0; + for (int i = 0; i < 8; i++) { /* state->rt entries > 0 only written if independent blending */ const int j = state->independent_blend_enable ? i : 0; unsigned eqRGB = state->rt[j].rgb_func; unsigned srcRGB = state->rt[j].rgb_src_factor; unsigned dstRGB = state->rt[j].rgb_dst_factor; unsigned eqA = state->rt[j].alpha_func; unsigned srcA = state->rt[j].alpha_src_factor; unsigned dstA = state->rt[j].alpha_dst_factor; @@ -475,20 +477,22 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, if (blend->dual_src_blend && (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX || eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) { assert(!"Unsupported equation for dual source blending"); si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); continue; } /* cb_render_state will disable unused ones */ blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i); + if (state->rt[j].colormask) + blend->cb_target_enabled_4bit |= 0xf << (4 * i); if (!state->rt[j].colormask || !state->rt[j].blend_enable) { si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); continue; } /* Blending optimizations for RB+. * These transformations don't change the behavior. * * First, get rid of DST in the blend factors: diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 26c7b4c..7b7d96c 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -48,20 +48,21 @@ struct si_shader_selector; struct si_state_blend { struct si_pm4_state pm4; uint32_tcb_target_mask; boolalpha_to_coverage; boolalpha_to_one; booldual_src_blend; /* Set 0xf or 0x0 (4 bits) per render target if the following is * true. ANDed with spi_shader_col_format. */ + unsignedcb_target_enabled_4bit; unsignedblend_enable_4bit; unsignedneed_src_alpha_4bit; }; struct si_state_rasterizer { struct si_pm4_state pm4; /* poly offset states for 16-bit, 24-bit, and 32-bit zbuffers */ struct si_pm4_state *pm4_poly_offset; unsignedpa_sc_line_stipple; unsignedpa_cl_clip_cntl; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 71d7987..061b3d2 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1348,20 +1348,21 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, */ key->part.ps.epilog.spi_shader_col_format = (blend->blend_enable_4bit & blend->need_src_alpha_4bit & sctx->framebuffer.spi_shader_col_format_blend_alpha) | (blend->blend_enable_4bit & ~blend->need_src_alpha_4bit & sctx->framebuffer.spi_shader_col_format_blend) | (~blend->blend_enable_4bit & blend->need_src_alpha_4bit & sctx->framebuffer.spi_shader_col_format_alpha) | (~blend->blend_enable_4bit & ~blend->need_src_alpha_4bit & sctx->
[Mesa-dev] [PATCH 1/2] gallium/radeon: sort DBG shader flags according to pipe_shader_type
From: Marek Olšák --- src/gallium/drivers/r600/r600_pipe.c | 2 +- src/gallium/drivers/radeon/r600_pipe_common.c | 20 ++-- src/gallium/drivers/radeon/r600_pipe_common.h | 25 - src/gallium/drivers/radeonsi/si_pipe.c| 5 ++--- 4 files changed, 17 insertions(+), 35 deletions(-) diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 023f1b4..9844add 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -659,21 +659,21 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws, if (rscreen->b.info.chip_class >= EVERGREEN) { rscreen->b.b.is_format_supported = evergreen_is_format_supported; } else { rscreen->b.b.is_format_supported = r600_is_format_supported; } rscreen->b.debug_flags |= debug_get_flags_option("R600_DEBUG", r600_debug_options, 0); if (debug_get_bool_option("R600_DEBUG_COMPUTE", FALSE)) rscreen->b.debug_flags |= DBG_COMPUTE; if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) - rscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS | DBG_TCS | DBG_TES; + rscreen->b.debug_flags |= DBG_ALL_SHADERS | DBG_FS; if (!debug_get_bool_option("R600_HYPERZ", TRUE)) rscreen->b.debug_flags |= DBG_NO_HYPERZ; if (rscreen->b.family == CHIP_UNKNOWN) { fprintf(stderr, "r600: Unknown chipset 0x%04X\n", rscreen->b.info.pci_id); FREE(rscreen); return NULL; } /* Figure out streamout kernel support. */ diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 7c12565..6b61901 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -892,22 +892,21 @@ static const char *r600_get_family_name(const struct r600_common_screen *rscreen case CHIP_STONEY: return "AMD STONEY"; case CHIP_VEGA10: return "AMD VEGA10"; case CHIP_RAVEN: return "AMD RAVEN"; default: return "AMD unknown"; } } static void r600_disk_cache_create(struct r600_common_screen *rscreen) { /* Don't use the cache if shader dumping is enabled. */ - if (rscreen->debug_flags & - (DBG_FS | DBG_VS | DBG_TCS | DBG_TES | DBG_GS | DBG_PS | DBG_CS)) + if (rscreen->debug_flags & DBG_ALL_SHADERS) return; uint32_t mesa_timestamp; if (disk_cache_get_function_timestamp(r600_disk_cache_create, &mesa_timestamp)) { char *timestamp_str; int res = -1; if (rscreen->chip_class < SI) { res = asprintf(×tamp_str, "%u",mesa_timestamp); } @@ -1528,36 +1527,21 @@ void r600_destroy_common_screen(struct r600_common_screen *rscreen) slab_destroy_parent(&rscreen->pool_transfers); disk_cache_destroy(rscreen->disk_shader_cache); rscreen->ws->destroy(rscreen->ws); FREE(rscreen); } bool r600_can_dump_shader(struct r600_common_screen *rscreen, unsigned processor) { - switch (processor) { - case PIPE_SHADER_VERTEX: - return (rscreen->debug_flags & DBG_VS) != 0; - case PIPE_SHADER_TESS_CTRL: - return (rscreen->debug_flags & DBG_TCS) != 0; - case PIPE_SHADER_TESS_EVAL: - return (rscreen->debug_flags & DBG_TES) != 0; - case PIPE_SHADER_GEOMETRY: - return (rscreen->debug_flags & DBG_GS) != 0; - case PIPE_SHADER_FRAGMENT: - return (rscreen->debug_flags & DBG_PS) != 0; - case PIPE_SHADER_COMPUTE: - return (rscreen->debug_flags & DBG_CS) != 0; - default: - return false; - } + return rscreen->debug_flags && (1 << processor); } bool r600_extra_shader_checks(struct r600_common_screen *rscreen, unsigned processor) { return (rscreen->debug_flags & DBG_CHECK_IR) || r600_can_dump_shader(rscreen, processor); } void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst, uint64_t offset, uint64_t size, unsigned value) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 59886ec..b0cd502 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -62,34 +62,33 @@ struct u_log_context; #define R600_CONTEXT_START_PIPELINE_STATS (1u << 1) #define R600_CONTEXT_STOP_PIPELINE_STATS (1u << 2) #define R600_CONTEXT_PRIVATE_FLAG (1u << 3) /* special primitive types */ #define R600_PRIM_RECTANGLE_LIST PIPE_PRIM_MAX
Re: [Mesa-dev] [PATCH 1/7] gallium: add CONSTBUF type to tgsi_file_type
I'd like CONSTBUF to stay. I think CONST is already too overloaded. If we start treating CONST[0] and CONST[0][0] differently, things are just going to get more ugly. If the second dimension is unspecified, it's implied to be 0. TGSI doesn't allow specifying the second dimension index and omit the first dimenstion, which means it can't say that CONST[0] is constbuf 0 without changing existing conventions. Marek On Wed, Aug 23, 2017 at 5:15 PM, Roland Scheidegger wrote: > Am 23.08.2017 um 17:03 schrieb Nicolai Hähnle: >> On 23.08.2017 16:03, Roland Scheidegger wrote: >>> Am 23.08.2017 um 15:49 schrieb Ilia Mirkin: On Wed, Aug 23, 2017 at 9:20 AM, Nicolai Hähnle wrote: > On 22.08.2017 16:56, Ilia Mirkin wrote: >> >> On Tue, Aug 22, 2017 at 10:51 AM, Roland Scheidegger >> >> wrote: >>> >>> I am probably missing something here, but why do you need a new >>> register >>> file? Since you couldn't use LOAD with TGSI_FILE_CONSTANT before, >>> can't >>> you just allow LOAD with TGSI_FILE_CONSTANT and achieve the same >>> thing? >>> Or do you need to know how it's going to be accessed in advance? >> >> >> With bindless, LOAD can take a CONST I believe [which contains the >> value of the bindless id]. I think it's nice to keep those concepts >> separate... having CONST sometimes mean the value and other times mean >> the address is a bit weird. This way CONSTBUF[0] is the address of the >> 0th constbuf. > > > I'm still not quite convinced. The levels of indirection should > clarify the > meaning, shouldn't they? > > You get > >LOAD dst, CONST[0][0], IMM[0] > > when loading from offset IMM[0] of a bindless buffer whose handle is > at the > beginning of the buffer CONST[0]. > > You get > >LOAD dst, CONST[0], IMM[0] > > when loading from offset IMM[0] of non-bindless buffer 0. > > Is there ever really a situation where the two could be confused? I always considered CONST[0] == CONST[0][0]. Technically they're not, since once has the second dimension in the TGSI encoding while the other doesn't. But practically, MOV TEMP[0], CONST[0] and MOV TEMP[0], CONST[0][0] are in every way identical. Currently st/mesa will just use CONST[0] everywhere, never adding the 2nd dimension. >>> Maybe it would be worth the effort to fix this? >> >> Would be nice. One thing that makes this a bit awkward is that older >> drivers just don't support two-dimensional CONST at all -- see >> PIPE_SHADER_CAP_MAX_CONST_BUFFERS. Giving them a shader that loads >> CONST[0][n] is going to fail. > I suppose it wouldn't be too difficult to make them just accept this > (basically ignoring the buffer index). > But anyway, I don't know if it's worth the hassle, I just brought it up > because if it's a problem going forward, it should be possible to change > it. (Albeit we definitely have code relying on these 1d constants too...) > > Roland > > >> >> Basically, changing this is a backward-compatible change to state >> trackers, which would have to promise not to produce one-dimensional >> CONST for the usual, vec4-based constant fetching. >> >> On the other hand, maybe we're over-complicating this. The only >> instruction that is really affected is LOAD. And for LOAD, there >> shouldn't be a compatibility problem. Hmm... >> >> Cheers, >> Nicolai >> >>> >>> Roland >>> >>> >>> As such, I don't think we should start having behavioural differences for those on some instructions. >>> >> >> > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/6] blorp: Turn anv_CmdCopyBuffer into a blorp_buffer_copy() helper.
Quoting Kenneth Graunke (2017-08-29 22:28:38) > Anvil already had code to copy between two buffer objects in the most > efficient way possible, using large bpp copies, then smaller bpp copies. > > This patch moves that logic into BLORP as blorp_buffer_copy(), so we > can reuse it in i965 as well. Just some food for thought... The render access pattern is terrible for wide linear buffers, as the pixel block ends up being scattered across many pages, thrashing the TLB. But if instead of telling it to copy as wide as possible, you just limited it to a stride of 4096/4 (if memory serves 4 is the largest pixel block height) then each pixel block is located inside just one page (and hopefully cluster across the eu, further reducing the stride may help if the spread ends up taller than wider). -Chris ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/5] intel/isl: Set MOCS based on usage for surface states
On Tuesday, August 1, 2017 3:48:30 PM PDT Jason Ekstrand wrote: > This makes ISL now ignore the MOCS data provided by the caller and just > set it based on surface usage. > --- > src/intel/isl/isl_emit_depth_stencil.c | 12 > src/intel/isl/isl_genX_mocs.h | 53 > ++ > src/intel/isl/isl_surface_state.c | 9 +++--- > 3 files changed, 65 insertions(+), 9 deletions(-) > create mode 100644 src/intel/isl/isl_genX_mocs.h > > diff --git a/src/intel/isl/isl_emit_depth_stencil.c > b/src/intel/isl/isl_emit_depth_stencil.c > index 0d541fd..212ed88 100644 > --- a/src/intel/isl/isl_emit_depth_stencil.c > +++ b/src/intel/isl/isl_emit_depth_stencil.c > @@ -23,6 +23,9 @@ > > #include > > +#include "isl_priv.h" > +#include "isl_genX_mocs.h" > + > #define __gen_address_type uint64_t > #define __gen_user_data void > > @@ -35,8 +38,6 @@ __gen_combine_address(void *data, void *loc, uint64_t addr, > uint32_t delta) > #include "genxml/gen_macros.h" > #include "genxml/genX_pack.h" > > -#include "isl_priv.h" > - > #define __PASTE2(x, y) x ## y > #define __PASTE(x, y) __PASTE2(x, y) > #define isl_genX(x) __PASTE(isl_, genX(x)) > @@ -96,7 +97,7 @@ isl_genX(emit_depth_stencil_hiz_s)(const struct isl_device > *dev, void *batch, > #endif >db.SurfaceBaseAddress = info->depth_address; > #if GEN_GEN >= 6 > - db.DepthBufferMOCS = info->mocs; > + db.DepthBufferMOCS = get_mocs_for_usage(ISL_SURF_USAGE_DEPTH_BIT); > #endif > > #if GEN_GEN <= 6 > @@ -140,7 +141,7 @@ isl_genX(emit_depth_stencil_hiz_s)(const struct > isl_device *dev, void *batch, > #endif >sb.SurfaceBaseAddress = info->stencil_address; > #if GEN_GEN >= 6 > - sb.StencilBufferMOCS = info->mocs; > + sb.StencilBufferMOCS = get_mocs_for_usage(ISL_SURF_USAGE_STENCIL_BIT); > #endif >sb.SurfacePitch = info->stencil_surf->row_pitch - 1; > #if GEN_GEN >= 8 > @@ -163,7 +164,8 @@ isl_genX(emit_depth_stencil_hiz_s)(const struct > isl_device *dev, void *batch, >db.HierarchicalDepthBufferEnable = true; > >hiz.SurfaceBaseAddress = info->hiz_address; > - hiz.HierarchicalDepthBufferMOCS = info->mocs; > + hiz.HierarchicalDepthBufferMOCS = > + get_mocs_for_usage(ISL_SURF_USAGE_HIZ_BIT); >hiz.SurfacePitch = info->hiz_surf->row_pitch - 1; > #if GEN_GEN >= 8 >/* From the SKL PRM Vol2a: > diff --git a/src/intel/isl/isl_genX_mocs.h b/src/intel/isl/isl_genX_mocs.h > new file mode 100644 > index 000..158ebec > --- /dev/null > +++ b/src/intel/isl/isl_genX_mocs.h > @@ -0,0 +1,53 @@ > +/* > + * Copyright 2017 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the > "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS > OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR > OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > DEALINGS > + * IN THE SOFTWARE. > + */ > + > +#include > + > +#include "isl_priv.h" > + > +#include "genxml/gen_macros.h" > + > +#if GEN_GEN >= 6 > +static uint32_t > +get_mocs_for_usage(isl_surf_usage_flags_t usage) > +{ > +#if GEN_GEN == 6 > + return 0; /* PTE */ > +#elif GEN_GEN == 7 > + return 1; /* Cache in L3$, LLC and eLLC use PTE */ Using a semi-colon would be clearer: /* Cache in L3; LLC and eLLC use PTE */ otherwise it might be possible to read it as "Cache in L3, LLC, and eLLC ..." signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/5] intel/isl: Set MOCS based on view usage
On Tuesday, August 1, 2017 3:48:29 PM PDT Jason Ekstrand wrote: > This little series changes things around so that, instead of passing MOCS > values into ISL, ISL knows how to set them itself. This allows us to > centralize some of the decisions about how MOCS gets set for surfaces and > hopefully, if we ever do anything crazy in the future, we can share it > between GL and Vulkan. Unfortunately, surfaces are not the only places > where MOCS is used. It also shows up in vertex buffers, index buffers, and > streamout buffers. However those are always set to the platform equivalent > of I915_MOCS_CACHED (and that's not all that liable to change) so they're > not particularly interesting. > > If people like this approach, I'd like to Cc it to stable for 17.2 because > it has the side-effect of making Vulkan MOCS a bit more sane. > > Jason Ekstrand (5): > intel/isl: Set MOCS based on usage for surface states > intel/blorp: Delete the MOCS plumbing > i965: Stop passing MOCS information into ISL > anv: Stop passing MOCS information into ISL > intel/isl: Get rid of the mocs fields in fill/emit_info > > src/intel/blorp/blorp.h | 6 --- > src/intel/blorp/blorp_genX_exec.h| 37 +++-- > src/intel/isl/isl.h | 22 -- > src/intel/isl/isl_emit_depth_stencil.c | 12 +++--- > src/intel/isl/isl_genX_mocs.h| 53 > > src/intel/isl/isl_surface_state.c| 9 ++-- > src/intel/vulkan/anv_blorp.c | 3 -- > src/intel/vulkan/anv_device.c| 1 - > src/intel/vulkan/anv_image.c | 12 ++ > src/intel/vulkan/anv_private.h | 2 - > src/intel/vulkan/genX_cmd_buffer.c | 13 ++ > src/intel/vulkan/genX_state.c| 3 -- > src/mesa/drivers/dri/i965/brw_blorp.c| 15 --- > src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 26 ++-- > 14 files changed, 101 insertions(+), 113 deletions(-) > create mode 100644 src/intel/isl/isl_genX_mocs.h > > I guess this is okay. I'm not crazy about it, mostly because the logic is still spread out in a bunch of places. If there was a vertex buffer usage bit and we could handle that there, I think that would be nice, even if it doesn't otherwise seem relevant to ISL. *shrug* Having ISL do this itself rather than passing in values makes sense. Reviewed-by: Kenneth Graunke signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/6] blorp: Turn anv_CmdCopyBuffer into a blorp_buffer_copy() helper.
Anvil already had code to copy between two buffer objects in the most efficient way possible, using large bpp copies, then smaller bpp copies. This patch moves that logic into BLORP as blorp_buffer_copy(), so we can reuse it in i965 as well. --- src/intel/blorp/blorp.h | 6 ++ src/intel/blorp/blorp_blit.c | 128 +++ src/intel/vulkan/anv_blorp.c | 104 +++ 3 files changed, 141 insertions(+), 97 deletions(-) diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h index 4d9a44b0926..951a97f5daa 100644 --- a/src/intel/blorp/blorp.h +++ b/src/intel/blorp/blorp.h @@ -132,6 +132,12 @@ blorp_copy(struct blorp_batch *batch, uint32_t dst_x, uint32_t dst_y, uint32_t src_width, uint32_t src_height); +void +blorp_buffer_copy(struct blorp_batch *batch, + void *src_bo, uint64_t src_offset, + void *dst_bo, uint64_t dst_offset, + uint64_t size); + void blorp_fast_clear(struct blorp_batch *batch, const struct blorp_surf *surf, enum isl_format format, diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c index 35008cbbb0a..998fd9b6d39 100644 --- a/src/intel/blorp/blorp_blit.c +++ b/src/intel/blorp/blorp_blit.c @@ -2513,3 +2513,131 @@ blorp_copy(struct blorp_batch *batch, do_blorp_blit(batch, ¶ms, &wm_prog_key, &coords); } + +static enum isl_format +isl_format_for_size(unsigned size_B) +{ + switch (size_B) { + case 1: return ISL_FORMAT_R8_UINT; + case 2: return ISL_FORMAT_R8G8_UINT; + case 4: return ISL_FORMAT_R8G8B8A8_UINT; + case 8: return ISL_FORMAT_R16G16B16A16_UINT; + case 16: return ISL_FORMAT_R32G32B32A32_UINT; + default: + unreachable("Not a power-of-two format size"); + } +} + +/** + * Returns the greatest common divisor of a and b that is a power of two. + */ +static uint64_t +gcd_pow2_u64(uint64_t a, uint64_t b) +{ + assert(a > 0 || b > 0); + + unsigned a_log2 = ffsll(a) - 1; + unsigned b_log2 = ffsll(b) - 1; + + /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which +* case, the MIN2() will take the other one. If both are 0 then we will +* hit the assert above. +*/ + return 1 << MIN2(a_log2, b_log2); +} + +static void +do_buffer_copy(struct blorp_batch *batch, + void *src_bo, uint64_t src_offset, + void *dst_bo, uint64_t dst_offset, + int width, int height, int block_size) +{ + /* The actual format we pick doesn't matter as blorp will throw it away. +* The only thing that actually matters is the size. +*/ + enum isl_format format = isl_format_for_size(block_size); + + UNUSED bool ok; + struct isl_surf surf; + ok = isl_surf_init(batch->blorp->isl_dev, &surf, + .dim = ISL_SURF_DIM_2D, + .format = format, + .width = width, + .height = height, + .depth = 1, + .levels = 1, + .array_len = 1, + .samples = 1, + .row_pitch = width * block_size, + .usage = ISL_SURF_USAGE_TEXTURE_BIT | + ISL_SURF_USAGE_RENDER_TARGET_BIT, + .tiling_flags = ISL_TILING_LINEAR_BIT); + assert(ok); + + struct blorp_surf src_blorp_surf = { + .surf = &surf, + .addr = { + .buffer = src_bo, + .offset = src_offset, + }, + }; + + struct blorp_surf dst_blorp_surf = { + .surf = &surf, + .addr = { + .buffer = dst_bo, + .offset = dst_offset, + }, + }; + + blorp_copy(batch, &src_blorp_surf, 0, 0, &dst_blorp_surf, 0, 0, + 0, 0, 0, 0, width, height); +} + +/* This is maximum possible width/height our HW can handle */ +#define MAX_SURFACE_DIM (1ull << 14) + +void +blorp_buffer_copy(struct blorp_batch *batch, + void *src_bo, uint64_t src_offset, + void *dst_bo, uint64_t dst_offset, + uint64_t size) +{ + uint64_t copy_size = size; + + /* First, we compute the biggest format that can be used with the +* given offsets and size. +*/ + int bs = 16; + bs = gcd_pow2_u64(bs, src_offset); + bs = gcd_pow2_u64(bs, dst_offset); + bs = gcd_pow2_u64(bs, size); + + /* First, we make a bunch of max-sized copies */ + uint64_t max_copy_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs; + while (copy_size >= max_copy_size) { + do_buffer_copy(batch, src_bo, src_offset, dst_bo, dst_offset, + MAX_SURFACE_DIM, MAX_SURFACE_DIM, bs); + copy_size -= max_copy_size; + src_offset += max_copy_size; + dst_offset += max_copy_size; + } + + /* Now make a max-width copy */ + uint64_t height = copy_size / (MAX_SURFACE_DIM * bs); + assert(height < MAX_SURFACE_DIM); + if (height != 0) { + uint64_t re
[Mesa-dev] [PATCH 4/6] i965: Add PIPE_CONTRTOL_DATA_CACHE flush to brw_emit_mi_flush().
Although we're phasing out brw_emit_mi_flush(), we still use it in some places in order to "flush everything". In a number of those places, we write data to a buffer that we may then bind as an image surface, SSBO, or atomic buffer. Those usages require us to flush the data cache. --- src/mesa/drivers/dri/i965/brw_pipe_control.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c index 2a84fb8864e..53b00564b21 100644 --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c @@ -435,6 +435,7 @@ brw_emit_mi_flush(struct brw_context *brw) if (brw->gen >= 6) { flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE | PIPE_CONTROL_CONST_CACHE_INVALIDATE | + PIPE_CONTROL_DATA_CACHE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/6] i965: Always flush caches after blitting to a GL buffer object.
When we blit data into a buffer object, we may need to invalidate any caches that might contain stale data, so the new data becomes visible. For example, if the buffer object is bound as a vertex buffer, we need to invalidate the vertex fetch cache. While this flushing was missing, it usually happened implicitly for non-obvious reasons: we're usually on the render ring, and calling intel_emit_linear_blit() would require switching to the BLT ring, causing an implicit flush. This likely provoked the kernel to do PIPE_CONTROLs on our behalf. Although, Gen4-5 wouldn't have this behavior. At any rate, we should do it ourselves. --- src/mesa/drivers/dri/i965/intel_buffer_objects.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c index dd6fca12d0b..01443c2b7fc 100644 --- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c @@ -298,6 +298,7 @@ brw_buffer_subdata(struct gl_context *ctx, intel_obj->buffer, offset, temp_bo, 0, size); + brw_emit_mi_flush(brw); brw_bo_unreference(temp_bo); mark_buffer_valid_data(intel_obj, offset, size); @@ -541,6 +542,7 @@ brw_flush_mapped_buffer_range(struct gl_context *ctx, mark_buffer_gpu_usage(intel_obj, obj->Mappings[index].Offset + offset, length); + brw_emit_mi_flush(brw); } @@ -570,6 +572,7 @@ brw_unmap_buffer(struct gl_context *ctx, obj->Mappings[index].Length); mark_buffer_gpu_usage(intel_obj, obj->Mappings[index].Offset, obj->Mappings[index].Length); + brw_emit_mi_flush(brw); } /* Since we've emitted some blits to buffers that will (likely) be used @@ -577,7 +580,6 @@ brw_unmap_buffer(struct gl_context *ctx, * flush. Once again, we wish for a domain tracker in libdrm to cover * usage inside of a batchbuffer. */ - brw_emit_mi_flush(brw); brw_bo_unreference(intel_obj->range_map_bo[index]); intel_obj->range_map_bo[index] = NULL; -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/6] blorp: Make blorp_buffer_copy work on Gen4-6.
Gen4-6 can only handle surfaces up to 8192. Only Gen7+ can do 16384. --- src/intel/blorp/blorp_blit.c | 19 ++- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c index 998fd9b6d39..d3b51a18161 100644 --- a/src/intel/blorp/blorp_blit.c +++ b/src/intel/blorp/blorp_blit.c @@ -2594,17 +2594,18 @@ do_buffer_copy(struct blorp_batch *batch, 0, 0, 0, 0, width, height); } -/* This is maximum possible width/height our HW can handle */ -#define MAX_SURFACE_DIM (1ull << 14) - void blorp_buffer_copy(struct blorp_batch *batch, void *src_bo, uint64_t src_offset, void *dst_bo, uint64_t dst_offset, uint64_t size) { + const struct gen_device_info *devinfo = batch->blorp->isl_dev->info; uint64_t copy_size = size; + /* This is maximum possible width/height our HW can handle */ + uint64_t max_surface_dim = 1 << (devinfo->gen >= 7 ? 14 : 13); + /* First, we compute the biggest format that can be used with the * given offsets and size. */ @@ -2614,22 +2615,22 @@ blorp_buffer_copy(struct blorp_batch *batch, bs = gcd_pow2_u64(bs, size); /* First, we make a bunch of max-sized copies */ - uint64_t max_copy_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs; + uint64_t max_copy_size = max_surface_dim * max_surface_dim * bs; while (copy_size >= max_copy_size) { do_buffer_copy(batch, src_bo, src_offset, dst_bo, dst_offset, - MAX_SURFACE_DIM, MAX_SURFACE_DIM, bs); + max_surface_dim, max_surface_dim, bs); copy_size -= max_copy_size; src_offset += max_copy_size; dst_offset += max_copy_size; } /* Now make a max-width copy */ - uint64_t height = copy_size / (MAX_SURFACE_DIM * bs); - assert(height < MAX_SURFACE_DIM); + uint64_t height = copy_size / (max_surface_dim * bs); + assert(height < max_surface_dim); if (height != 0) { - uint64_t rect_copy_size = height * MAX_SURFACE_DIM * bs; + uint64_t rect_copy_size = height * max_surface_dim * bs; do_buffer_copy(batch, src_bo, src_offset, dst_bo, dst_offset, - MAX_SURFACE_DIM, height, bs); + max_surface_dim, height, bs); copy_size -= rect_copy_size; src_offset += rect_copy_size; dst_offset += rect_copy_size; -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/6] i965: Use BLORP for buffer object stall avoidance blits instead of BLT.
Improves performance of GFXBench4 tests at 1024x768 on a Kabylake GT2: - Manhattan 3.1 by 1.32134% +/- 0.322734% (n=8). - Car Chase by 1.25607% +/- 0.291262% (n=5). --- src/mesa/drivers/dri/i965/intel_buffer_objects.c | 22 +++--- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c index 01443c2b7fc..49e68bd7392 100644 --- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c @@ -37,7 +37,7 @@ #include "x86/common_x86_asm.h" #include "brw_context.h" -#include "intel_blit.h" +#include "brw_blorp.h" #include "intel_buffer_objects.h" #include "intel_batchbuffer.h" #include "intel_tiled_memcpy.h" @@ -294,9 +294,9 @@ brw_buffer_subdata(struct gl_context *ctx, brw_bo_subdata(temp_bo, 0, size, data); - intel_emit_linear_blit(brw, -intel_obj->buffer, offset, + brw_blorp_copy_buffers(brw, temp_bo, 0, +intel_obj->buffer, offset, size); brw_emit_mi_flush(brw); @@ -533,11 +533,11 @@ brw_flush_mapped_buffer_range(struct gl_context *ctx, * another blit of that area and the complete newer data will land the * second time. */ - intel_emit_linear_blit(brw, - intel_obj->buffer, - obj->Mappings[index].Offset + offset, + brw_blorp_copy_buffers(brw, intel_obj->range_map_bo[index], intel_obj->map_extra[index] + offset, + intel_obj->buffer, + obj->Mappings[index].Offset + offset, length); mark_buffer_gpu_usage(intel_obj, obj->Mappings[index].Offset + offset, @@ -565,10 +565,10 @@ brw_unmap_buffer(struct gl_context *ctx, brw_bo_unmap(intel_obj->range_map_bo[index]); if (!(obj->Mappings[index].AccessFlags & GL_MAP_FLUSH_EXPLICIT_BIT)) { - intel_emit_linear_blit(brw, -intel_obj->buffer, obj->Mappings[index].Offset, + brw_blorp_copy_buffers(brw, intel_obj->range_map_bo[index], intel_obj->map_extra[index], +intel_obj->buffer, obj->Mappings[index].Offset, obj->Mappings[index].Length); mark_buffer_gpu_usage(intel_obj, obj->Mappings[index].Offset, obj->Mappings[index].Length); @@ -646,9 +646,9 @@ brw_copy_buffer_subdata(struct gl_context *ctx, dst_bo = intel_bufferobj_buffer(brw, intel_dst, write_offset, size, true); src_bo = intel_bufferobj_buffer(brw, intel_src, read_offset, size, false); - intel_emit_linear_blit(brw, - dst_bo, write_offset, - src_bo, read_offset, size); + brw_blorp_copy_buffers(brw, + src_bo, read_offset, + dst_bo, write_offset, size); /* Since we've emitted some blits to buffers that will (likely) be used * in rendering operations in other cache domains in this batch, emit a -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/6] i965: Add a brw_blorp_copy_buffers() command.
This exposes the new blorp_copy_buffer() functionality to i965. It should be a drop-in replacement for intel_emit_linear_blit() (other than the arguments being backwards, for consistency with BLORP). --- src/mesa/drivers/dri/i965/brw_blorp.c | 18 ++ src/mesa/drivers/dri/i965/brw_blorp.h | 8 2 files changed, 26 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c index 3ea396d726e..3d8203619f5 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.c +++ b/src/mesa/drivers/dri/i965/brw_blorp.c @@ -428,6 +428,24 @@ brw_blorp_copy_miptrees(struct brw_context *brw, dst_aux_usage); } +void +brw_blorp_copy_buffers(struct brw_context *brw, + struct brw_bo *src_bo, + unsigned src_offset, + struct brw_bo *dst_bo, + unsigned dst_offset, + unsigned size) +{ + DBG("%s %d bytes from %p[%d] to %p[%d]", + __func__, size, src_bo, src_offset, dst_bo, dst_offset); + + struct blorp_batch batch; + blorp_batch_init(&brw->blorp, &batch, brw, 0); + blorp_buffer_copy(&batch, src_bo, src_offset, dst_bo, dst_offset, size); + blorp_batch_finish(&batch); +} + + static struct intel_mipmap_tree * find_miptree(GLbitfield buffer_bit, struct intel_renderbuffer *irb) { diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h index c65a68a53d3..cf781ec53cb 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.h +++ b/src/mesa/drivers/dri/i965/brw_blorp.h @@ -59,6 +59,14 @@ brw_blorp_copy_miptrees(struct brw_context *brw, unsigned dst_x, unsigned dst_y, unsigned src_width, unsigned src_height); +void +brw_blorp_copy_buffers(struct brw_context *brw, + struct brw_bo *src_bo, + unsigned src_offset, + struct brw_bo *dst_bo, + unsigned dst_offset, + unsigned size); + void brw_blorp_clear_color(struct brw_context *brw, struct gl_framebuffer *fb, GLbitfield mask, bool partial_clear, bool encode_srgb); -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] radv: don't assert on empty hash table
Reviewed-by: Bas Nieuwenhuizen On Sun, Aug 27, 2017, at 23:44, Grazvydas Ignotas wrote: > Currently if table_size is 0, it's falling through to: > > unreachable("hash table should never be full"); > > But table_size can be 0 when RADV_DEBUG=nocache is set, or when the > table allocation fails (which is not considered an error). > > Fixes: f4e499ec791 "radv: add initial non-conformant radv vulkan driver" > Signed-off-by: Grazvydas Ignotas > --- > src/amd/vulkan/radv_pipeline_cache.c | 3 +++ > 1 file changed, 3 insertions(+) > > diff --git a/src/amd/vulkan/radv_pipeline_cache.c > b/src/amd/vulkan/radv_pipeline_cache.c > index 99a614d..beed35b 100644 > --- a/src/amd/vulkan/radv_pipeline_cache.c > +++ b/src/amd/vulkan/radv_pipeline_cache.c > @@ -116,10 +116,13 @@ radv_pipeline_cache_search_unlocked(struct > radv_pipeline_cache *cache, > const unsigned char *sha1) > { > const uint32_t mask = cache->table_size - 1; > const uint32_t start = (*(uint32_t *) sha1); > > + if (cache->table_size == 0) > + return NULL; > + > for (uint32_t i = 0; i < cache->table_size; i++) { > const uint32_t index = (start + i) & mask; > struct cache_entry *entry = cache->hash_table[index]; > > if (!entry) > -- > 2.7.4 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] winsys/amdgpu: add BO to the global list only when RADEON_ALL_BOS is set
Reviewed-by: Marek Olšák Marek On Tue, Aug 29, 2017 at 4:24 PM, Samuel Pitoiset wrote: > Only useful when that debug option is enabled. > > Signed-off-by: Samuel Pitoiset > --- > src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 20 > src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 4 +--- > src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 3 +++ > src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h | 1 + > 4 files changed, 17 insertions(+), 11 deletions(-) > > diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c > b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c > index 97bbe235a4..1323be8356 100644 > --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c > +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c > @@ -167,10 +167,12 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf) > > assert(bo->bo && "must not be called for slab entries"); > > - mtx_lock(&bo->ws->global_bo_list_lock); > - LIST_DEL(&bo->u.real.global_list_item); > - bo->ws->num_buffers--; > - mtx_unlock(&bo->ws->global_bo_list_lock); > + if (bo->ws->debug_all_bos) { > + mtx_lock(&bo->ws->global_bo_list_lock); > + LIST_DEL(&bo->u.real.global_list_item); > + bo->ws->num_buffers--; > + mtx_unlock(&bo->ws->global_bo_list_lock); > + } > > amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP); > amdgpu_va_range_free(bo->u.real.va_handle); > @@ -360,10 +362,12 @@ static void amdgpu_add_buffer_to_global_list(struct > amdgpu_winsys_bo *bo) > > assert(bo->bo); > > - mtx_lock(&ws->global_bo_list_lock); > - LIST_ADDTAIL(&bo->u.real.global_list_item, &ws->global_bo_list); > - ws->num_buffers++; > - mtx_unlock(&ws->global_bo_list_lock); > + if (ws->debug_all_bos) { > + mtx_lock(&ws->global_bo_list_lock); > + LIST_ADDTAIL(&bo->u.real.global_list_item, &ws->global_bo_list); > + ws->num_buffers++; > + mtx_unlock(&ws->global_bo_list_lock); > + } > } > > static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, > diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c > b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c > index 9cadfc4298..5ddde8e794 100644 > --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c > +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c > @@ -903,8 +903,6 @@ static unsigned amdgpu_cs_get_buffer_list(struct > radeon_winsys_cs *rcs, > return cs->num_real_buffers; > } > > -DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", false) > - > static void amdgpu_add_fence_dependency(struct amdgpu_cs *acs, > struct amdgpu_cs_buffer *buffer) > { > @@ -1097,7 +1095,7 @@ void amdgpu_cs_submit_ib(void *job, int thread_index) > /* Create the buffer list. > * Use a buffer list containing all allocated buffers if requested. > */ > - if (debug_get_option_all_bos()) { > + if (ws->debug_all_bos) { >struct amdgpu_winsys_bo *bo; >amdgpu_bo_handle *handles; >unsigned num = 0; > diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c > b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c > index a8b5e5b41a..5e0c1fd8d8 100644 > --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c > +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c > @@ -50,6 +50,8 @@ > static struct util_hash_table *dev_tab = NULL; > static mtx_t dev_tab_mutex = _MTX_INITIALIZER_NP; > > +DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", false) > + > /* Helper function to do the ioctls needed for setup and init. */ > static bool do_winsys_init(struct amdgpu_winsys *ws, int fd) > { > @@ -70,6 +72,7 @@ static bool do_winsys_init(struct amdgpu_winsys *ws, int fd) > } > > ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != > NULL; > + ws->debug_all_bos = debug_get_option_all_bos(); > > return true; > > diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h > b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h > index 7aca612f45..de54470ede 100644 > --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h > +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h > @@ -79,6 +79,7 @@ struct amdgpu_winsys { > ADDR_HANDLE addrlib; > > bool check_vm; > + bool debug_all_bos; > > /* List of all allocated buffers */ > mtx_t global_bo_list_lock; > -- > 2.14.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] winsys/amdgpu: set AMDGPU_GEM_CREATE_LOCAL if possible
On Tue, Aug 29, 2017 at 4:47 PM, Christian König wrote: > From: Christian König > > When the kernel supports it set the local flag and > stop adding those BOs to the BO list. > > Can probably be optimized much more. > > Signed-off-by: Christian König > --- > src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 8 > src/gallium/winsys/amdgpu/drm/amdgpu_bo.h | 2 ++ > src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 22 +- > 3 files changed, 27 insertions(+), 5 deletions(-) > > diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c > b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c > index 08348ed..1238d58 100644 > --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c > +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c > @@ -38,6 +38,10 @@ > #include > #include > > +#ifndef AMDGPU_GEM_CREATE_LOCAL > +#define AMDGPU_GEM_CREATE_LOCAL (1 << 6) > +#endif > + > /* Set to 1 for verbose output showing committed sparse buffer ranges. */ > #define DEBUG_SPARSE_COMMITS 0 > > @@ -402,6 +406,9 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct > amdgpu_winsys *ws, >request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; > if (flags & RADEON_FLAG_GTT_WC) >request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC; > + if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING && > + ws->info.drm_minor >= 20) > + request.flags |= AMDGPU_GEM_CREATE_LOCAL; > > r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle); > if (r) { > @@ -435,6 +442,7 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct > amdgpu_winsys *ws, > bo->u.real.va_handle = va_handle; > bo->initial_domain = initial_domain; > bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1); > + bo->is_local = !!(request.flags & AMDGPU_GEM_CREATE_LOCAL); I think that "bool" doesn't need "!!" and automatically converts the value to true/false, but I'm not 100% sure. Anyway, the series is: Reviewed-by: Marek Olšák Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] radeonsi: update dirty_level_mask before dispatching
Reviewed-by: Marek Olšák Marek On Tue, Aug 29, 2017 at 5:37 PM, Samuel Pitoiset wrote: > This fixes a rendering issue with Hitman when bindless textures > are enabled. > > Fixes: 2263610827 ("radeonsi: flush DB caches only when transitioning from DB > to texturing") > Signed-off-by: Samuel Pitoiset > --- > src/gallium/drivers/radeon/r600_pipe_common.h | 1 + > src/gallium/drivers/radeonsi/si_compute.c | 5 + > 2 files changed, 6 insertions(+) > > diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h > b/src/gallium/drivers/radeon/r600_pipe_common.h > index 59886e..d76d4a1384 100644 > --- a/src/gallium/drivers/radeon/r600_pipe_common.h > +++ b/src/gallium/drivers/radeon/r600_pipe_common.h > @@ -569,6 +569,7 @@ struct r600_common_context { > unsignedgpu_reset_counter; > unsignedlast_dirty_tex_counter; > unsignedlast_compressed_colortex_counter; > + unsignedlast_num_draw_calls; > > struct threaded_context *tc; > struct u_suballocator *allocator_zeroed_memory; > diff --git a/src/gallium/drivers/radeonsi/si_compute.c > b/src/gallium/drivers/radeonsi/si_compute.c > index 3ebd22c3c1..ca334949d7 100644 > --- a/src/gallium/drivers/radeonsi/si_compute.c > +++ b/src/gallium/drivers/radeonsi/si_compute.c > @@ -782,6 +782,11 @@ static void si_launch_grid( > program->shader.compilation_failed) > return; > > + if (sctx->b.last_num_draw_calls != sctx->b.num_draw_calls) { > + si_update_fb_dirtiness_after_rendering(sctx); > + sctx->b.last_num_draw_calls = sctx->b.num_draw_calls; > + } > + > si_decompress_compute_textures(sctx); > > /* Add buffer sizes for memory checking in need_cs_space. */ > -- > 2.14.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Mesa-stable] [PATCH v2 1/2] gallivm: correct channel shift logic on big endian
Am 29.08.2017 um 18:51 schrieb Emil Velikov: > On 23 August 2017 at 21:32, Ben Crocker wrote: >> From: Ray Strode >> >> lp_build_fetch_rgba_soa fetches a texel from a texture. >> Part of that process involves first gathering the element >> together from memory into a packed format, and then breaking >> out the individual color channels into separate, parallel >> arrays. >> >> The code fails to account for endianess when reading the packed >> values. >> >> This commit attempts to correct the problem by reversing the order >> the packed values are read on big endian systems. >> >> Bugzilla: >> https://urldefense.proofpoint.com/v2/url?u=https-3A__bugs.freedesktop.org_show-5Fbug.cgi-3Fid-3D100613&d=DwIBaQ&c=uilaK90D4TOVoH58JNXRgQ&r=_QIjpv-UJ77xEQY8fIYoQtr5qv8wKrPJc7v7_-CYAb0&m=-owfKrLkZG_-D07ad7TcBmuZ0jASKVGMIE-e-E9DLr0&s=zMHob1rOLWVOOgtwdGeaQ_qRwgTRdsKq5wrlSVRyK9o&e= >> >> Cc: "17.2" "17.1" >> >> --- >> src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | 8 +++- >> 1 file changed, 7 insertions(+), 1 deletion(-) >> >> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c >> b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c >> index 98eb694..22c19b1 100644 >> --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c >> +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c >> @@ -650,7 +650,13 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, >> for (i = 0; i < format_desc->nr_channels; i++) { >> struct util_format_channel_description chan_desc = >> format_desc->channel[i]; >> unsigned blockbits = type.width; >> -unsigned vec_nr = chan_desc.shift / type.width; >> +unsigned vec_nr; >> + >> +#ifdef PIPE_ARCH_BIG_ENDIAN >> +vec_nr = (format_desc->block.bits - (chan_desc.shift + >> chan_desc.size)) / type.width; >> +#else >> +vec_nr = chan_desc.shift / type.width; >> +#endif >> chan_desc.shift %= type.width; >> > Roland, I think you're one of the more knowledgeable people for things > gallivm. > Can you please review this patch? Well I'm the one who regularly breaks the BE stuff :-). That said, I already reviewed this: https://lists.freedesktop.org/archives/mesa-dev/2017-August/167515.html Roland ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 20/47] i965/fs: Define new shader opcodes to set rounding modes
On 29/08/17 21:18, Francisco Jerez wrote: > Chema Casanova writes: > >> El 25/08/17 a las 20:09, Francisco Jerez escribió: >>> Alejandro Piñeiro writes: >>> Although it is possible to emit them directly as AND/OR on brw_fs_nir, having specific opcodes makes it easier to remove duplicate settings later. Signed-off-by: Alejandro Piñeiro Signed-off-by: Jose Maria Casanova Crespo --- src/intel/compiler/brw_eu.h | 3 +++ src/intel/compiler/brw_eu_defines.h | 9 + src/intel/compiler/brw_eu_emit.c| 19 +++ src/intel/compiler/brw_fs_generator.cpp | 8 src/intel/compiler/brw_shader.cpp | 5 + 5 files changed, 44 insertions(+) diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index a3a9c63239d..0a7f8020398 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -500,6 +500,9 @@ brw_broadcast(struct brw_codegen *p, struct brw_reg src, struct brw_reg idx); +void +brw_rounding_mode(struct brw_codegen *p, + enum brw_rnd_mode mode); /*** * brw_eu_util.c: */ diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 1af835d47ed..50435df2fcf 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -388,6 +388,9 @@ enum opcode { SHADER_OPCODE_TYPED_SURFACE_WRITE, SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL, + SHADER_OPCODE_RND_MODE_RTE, + SHADER_OPCODE_RND_MODE_RTZ, + >>> We don't need an opcode for each possible rounding mode (there's also RU >>> and RD). How about you add a single SHADER_OPCODE_RND_MODE opcode >>> taking an immediate with the right rounding mode? >> I like the proposal. It is better having a unique opcode for setting the >> rounding mode. Changed for v2 of this patch. >>> Also, you should be marking the rounding mode opcodes as >>> has_side_effects(), because otherwise you're giving the scheduler the >>> freedom of moving your rounding mode update instruction past the >>> instruction you wanted it to have an effect on... >> Well pointed, we already realized that it was missing while debugging >> the latency problem of the control register. It was hiding the problem >> re-scheduling the cr0 modification to the beginning of the shader. SHADER_OPCODE_MEMORY_FENCE, SHADER_OPCODE_GEN4_SCRATCH_READ, @@ -1233,4 +1236,10 @@ enum brw_message_target { /* R0 */ # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT27 +enum PACKED brw_rnd_mode { + BRW_RND_MODE_UNSPECIFIED, + BRW_RND_MODE_RTE, + BRW_RND_MODE_RTZ, >>> Since you're introducing a back-end-specific rounding mode enum already, >>> why not use the hardware values right away so you avoid hard-coding >>> magic constants below. >> At the end we removed MODE_UNSPECIFIED as it isn't really needed and we >> include Round Up and Down in the enum assignation based using the PRM >> values. Also renamed RTE for RTNE to maintain coherence with nir >> conversion modifiers using the same acronym as PRM. >> >> +enum PACKED brw_rnd_mode { >> + BRW_RND_MODE_RTNE = 0, /* Round to Nearest or Even */ >> + BRW_RND_MODE_RU = 1,/* Round Up, toward +inf */ >> + BRW_RND_MODE_RD = 2,/* Round Down, toward -inf */ >> + BRW_RND_MODE_RTZ = 3/* Round Toward Zero */ >> +}; >> >> I have a doubt about how to avoid the magic constants to close the v2 of >> this patch. One approach would be using the same code structure and >> taking advantage using the codification of rounding field. This way we >> just formula and expect that the C compiler optimizer to guess that the >> immediate value is a constant. >> > > Hm, I'm not particularly worried about the C++ compiler behavior, it's > most likely immaterial. > >> switch (mode) { >> case BRW_RND_MODE_RTZ: >> - inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), >> brw_imm_ud(0x0030u)); >> + inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), >> brw_imm_ud(((unsigned int) mode << 4))); >>break; >> - case BRW_RND_MODE_RTE: >> - inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), >> brw_imm_ud(0xffcfu)); >> + case BRW_RND_MODE_RTNE: >> + inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), >> brw_imm_ud(((unsigned int) mode << 4) | ~0x0030u)); >>break; >> default: >> > > I don't think this is particularly advantageous since you still need to > special-case every possible rounding mode. > >> Another approach could be to implement a general solution for all >> supported rounding modes by the hw including Round Up and Down using >> bitwise operations. >> >> /** >> * Changes the
[Mesa-dev] [PATCH 8/8] fix test makefile
--- src/util/tests/string_buffer/Makefile.am | 16 ++-- 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/util/tests/string_buffer/Makefile.am b/src/util/tests/string_buffer/Makefile.am index 60039a90d2..fdf285fc5d 100644 --- a/src/util/tests/string_buffer/Makefile.am +++ b/src/util/tests/string_buffer/Makefile.am @@ -23,12 +23,16 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/src/util \ $(DEFINES) -LDADD = \ +string_buffer_test_SOURCES = \ + append_and_print.cpp + +string_buffer_test_CFLAGS = \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/util \ + $(DEFINES) \ + $(PTHREAD_CFLAGS) + +string_buffer_test_LDADD = \ $(top_builddir)/src/util/libmesautil.la \ $(PTHREAD_LIBS) \ $(DLOPEN_LIBS) - -TESTS = append_and_print \ - $() - -check_PROGRAMS = $(TESTS) -- 2.13.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/8] glcpp: Use string_buffer for line continuation removal
Migrate removal of line continuations to string_buffer. Before this it used ralloc_strncat() to append strings, which internally each time calculates strlen() of its argument. Its argument is entire shader, so it multiple time scans the whole shader text. Signed-off-by: Vladislav Egorov V2: Adapt to different API of string buffer (Thomas Helland) --- src/compiler/glsl/glcpp/pp.c | 25 ++--- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/compiler/glsl/glcpp/pp.c b/src/compiler/glsl/glcpp/pp.c index 40e0f033d2..c3827e439f 100644 --- a/src/compiler/glsl/glcpp/pp.c +++ b/src/compiler/glsl/glcpp/pp.c @@ -97,17 +97,25 @@ skip_newline (const char *str) return ret; } +/* Initial output buffer size, 4096 minus ralloc() overhead. It was selected + * to minimize total amount of allocated memory during shader-db run. + */ +#define INITIAL_PP_OUTPUT_BUF_SIZE 4048 + /* Remove any line continuation characters in the shader, (whether in * preprocessing directives or in GLSL code). */ static char * remove_line_continuations(glcpp_parser_t *ctx, const char *shader) { - char *clean = ralloc_strdup(ctx, ""); + struct _mesa_string_buffer *sb = + _mesa_string_buffer_create(ctx, INITIAL_PP_OUTPUT_BUF_SIZE); + const char *backslash, *newline, *search_start; const char *cr, *lf; char newline_separator[3]; int collapsed_newlines = 0; + int separator_len; backslash = strchr(shader, '\\'); @@ -153,6 +161,7 @@ remove_line_continuations(glcpp_parser_t *ctx, const char *shader) newline_separator[0] = '\n'; newline_separator[1] = '\r'; } + separator_len = strlen(newline_separator); while (true) { /* If we have previously collapsed any line-continuations, @@ -172,10 +181,12 @@ remove_line_continuations(glcpp_parser_t *ctx, const char *shader) if (newline && (backslash == NULL || newline < backslash)) { - ralloc_strncat(&clean, shader, - newline - shader + 1); + _mesa_string_buffer_append_len(sb, shader, + newline - shader + 1); while (collapsed_newlines) { - ralloc_strcat(&clean, newline_separator); + _mesa_string_buffer_append_len(sb, + newline_separator, + separator_len); collapsed_newlines--; } shader = skip_newline (newline); @@ -196,7 +207,7 @@ remove_line_continuations(glcpp_parser_t *ctx, const char *shader) if (backslash[1] == '\r' || backslash[1] == '\n') { collapsed_newlines++; - ralloc_strncat(&clean, shader, backslash - shader); + _mesa_string_buffer_append_len(sb, shader, backslash - shader); shader = skip_newline (backslash + 1); search_start = shader; } @@ -204,9 +215,9 @@ remove_line_continuations(glcpp_parser_t *ctx, const char *shader) backslash = strchr(search_start, '\\'); } - ralloc_strcat(&clean, shader); + _mesa_string_buffer_append(sb, shader); - return clean; + return sb->buf; } int -- 2.13.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/8] glcpp: Use Bloom filter before identifier search
From: Vladislav Egorov Absolute majority of identifiers in shaders are not macro references. Many shaders don't use preprocessing macros at all. Almost all queries to parser->defines hash-table will be unsuccessful. Note that all predefined macros start either with "GL_" or with "__". Moreover, even user-defined macros are usually use upper case, while ordinary identifiers use lower case. It's easy to detect macros just looking on the first two characters of their name. So it makes sense to make a quick identifier lookup in a Bloom filter before making relatively expensive hash-table search call. Many schemes of Bloom are possible, but it seems that two mini 64 bit Bloom tables -- one for the 1st identifier's char, one for the second identifier's char -- with one trivial hash-function (6 lowest bits of the char) work very well. Considering that Latin-1 alphabetic chars and _ occupy positions from 65 to 122, every character will have its bit position in 64-bit word, distinguishing lower and upper case. Some games do things like "#define float4 float", this scheme would not help here, it barely improves heavy users of preprocessor like Talos Principle, but in general it works pretty well. In my shader-db it eliminates 90%+ hash table queries during preprocessing stage. One 64-bit Bloom table working on only 1st or only 2nd char will eliminate ~70-80% queries. For simplicity use the same scheme for both 32-bit and 64-bit archs. 32-bit archs still have pretty efficients ways to do 64-bit shifts. --- src/compiler/glsl/glcpp/glcpp-parse.y | 24 ++-- src/compiler/glsl/glcpp/glcpp.h | 10 ++ 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y index debf12bef8..19a6df2015 100644 --- a/src/compiler/glsl/glcpp/glcpp-parse.y +++ b/src/compiler/glsl/glcpp/glcpp-parse.y @@ -1359,6 +1359,11 @@ glcpp_parser_create(const struct gl_extensions *extension_list, glcpp_lex_init_extra (parser, &parser->scanner); parser->defines = _mesa_hash_table_create(NULL, _mesa_key_hash_string, _mesa_key_string_equal); + parser->defines_bloom0 = 0; + parser->defines_bloom1 = 0; + GLCPP_DEFINES_BLOOM_PUT(parser, "__LINE__"); + GLCPP_DEFINES_BLOOM_PUT(parser, "__FILE__"); + parser->linalloc = linear_alloc_parent(parser, 0); parser->active = NULL; parser->lexing_directive = 0; @@ -1845,6 +1850,9 @@ _glcpp_parser_expand_node(glcpp_parser_t *parser, token_node_t *node, *last = node; identifier = token->value.str; + if (!GLCPP_DEFINES_BLOOM_GET(parser, identifier)) + return NULL; + /* Special handling for __LINE__ and __FILE__, (not through * the hash table). */ if (*identifier == '_') { @@ -2131,6 +2139,7 @@ _define_object_macro(glcpp_parser_t *parser, YYLTYPE *loc, glcpp_error (loc, parser, "Redefinition of macro %s\n", identifier); } + GLCPP_DEFINES_BLOOM_PUT(parser, identifier); _mesa_hash_table_insert (parser->defines, identifier, macro); } @@ -2166,6 +2175,7 @@ _define_function_macro(glcpp_parser_t *parser, YYLTYPE *loc, glcpp_error (loc, parser, "Redefinition of macro %s\n", identifier); } + GLCPP_DEFINES_BLOOM_PUT(parser, identifier); _mesa_hash_table_insert(parser->defines, identifier, macro); } @@ -2212,12 +,14 @@ glcpp_parser_lex(YYSTYPE *yylval, YYLTYPE *yylloc, glcpp_parser_t *parser) ret == ENDIF || ret == HASH_TOKEN) { parser->in_control_line = 1; } else if (ret == IDENTIFIER) { - struct hash_entry *entry = _mesa_hash_table_search(parser->defines, -yylval->str); - macro_t *macro = entry ? entry->data : NULL; - if (macro && macro->is_function) { -parser->newline_as_space = 1; -parser->paren_count = 0; + if (GLCPP_DEFINES_BLOOM_GET(parser, yylval->str)) { +struct hash_entry *entry = _mesa_hash_table_search(parser->defines, + yylval->str); +macro_t *macro = entry ? entry->data : NULL; +if (macro && macro->is_function) { + parser->newline_as_space = 1; + parser->paren_count = 0; +} } } diff --git a/src/compiler/glsl/glcpp/glcpp.h b/src/compiler/glsl/glcpp/glcpp.h index 9d997cd924..b2f6f7a2a3 100644 --- a/src/compiler/glsl/glcpp/glcpp.h +++ b/src/compiler/glsl/glcpp/glcpp.h @@ -37,6 +37,14 @@ #define yyscan_t void* +#define GLCPP_DEFINES_BLOOM_GET(parser, identifier) \ + (parser)->defines_bloom0) >> ((identifier)[0] & 63)) & 1) & \ +parser)->defines_bloom1) >> ((identifier)[1] & 63)) & 1)) + +#define GLCPP_DEFINES_BLOOM_PUT(parser, identifier) \ + (parser)->defines_bloom0 |= (uint64_t)1 << ((identifier)[0] & 63); \ + (parser)->defin
[Mesa-dev] [PATCH 2/8] util: Add tests for the string buffer
More tests could probably be added, but this should cover concatenation, resizing, clearing, formatted printing, and checking the length, so it should be quite complete. V2: Address review feedback from Timothy, plus fixes - Use a large enough char array - Actually test the formatted appending - Test that clear function resets string length --- configure.ac| 2 + src/util/Makefile.am| 3 +- src/util/tests/string_buffer/Makefile.am| 34 + src/util/tests/string_buffer/append_and_print.c | 99 + 4 files changed, 137 insertions(+), 1 deletion(-) create mode 100644 src/util/tests/string_buffer/Makefile.am create mode 100644 src/util/tests/string_buffer/append_and_print.c diff --git a/configure.ac b/configure.ac index 53d52f6d52..a0d7a67956 100644 --- a/configure.ac +++ b/configure.ac @@ -2933,9 +2933,11 @@ AC_CONFIG_FILES([Makefile src/mesa/main/tests/Makefile src/util/Makefile src/util/tests/hash_table/Makefile + src/util/tests/string_buffer/Makefile src/util/xmlpool/Makefile src/vulkan/Makefile]) + AC_OUTPUT # Fix up dependencies in *.Plo files, where we changed the extension of a diff --git a/src/util/Makefile.am b/src/util/Makefile.am index 4512dc99d5..7000202a36 100644 --- a/src/util/Makefile.am +++ b/src/util/Makefile.am @@ -19,7 +19,8 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. -SUBDIRS = xmlpool . tests/hash_table +SUBDIRS = . tests/hash_table \ + . tests/string_buffer include Makefile.sources diff --git a/src/util/tests/string_buffer/Makefile.am b/src/util/tests/string_buffer/Makefile.am new file mode 100644 index 00..60039a90d2 --- /dev/null +++ b/src/util/tests/string_buffer/Makefile.am @@ -0,0 +1,34 @@ +# Copyright © 2009 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# on the rights to use, copy, modify, merge, publish, distribute, sub +# license, and/or sell copies of the Software, and to permit persons to whom +# the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +# ADAM JACKSON BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +AM_CPPFLAGS = \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/util \ + $(DEFINES) + +LDADD = \ + $(top_builddir)/src/util/libmesautil.la \ + $(PTHREAD_LIBS) \ + $(DLOPEN_LIBS) + +TESTS = append_and_print \ + $() + +check_PROGRAMS = $(TESTS) diff --git a/src/util/tests/string_buffer/append_and_print.c b/src/util/tests/string_buffer/append_and_print.c new file mode 100644 index 00..71b857892f --- /dev/null +++ b/src/util/tests/string_buffer/append_and_print.c @@ -0,0 +1,99 @@ +/* + * Copyright © 2017 Thomas Helland + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include +#include +#include +#include +#include "string_buffer.h" + +int +main(int argc, char **argv) +{ + (void) argc; + (void) argv; + + struct _mesa_string_buffer *buf;
[Mesa-dev] [PATCH 3/8] glsl: Change the parser to use the string buffer
V2: Pointed out by Timothy - Fix pp.c reralloc size issue and comment V3 - Use vprintf instead of printf where we should - Fixes failing make-check tests --- src/compiler/glsl/glcpp/glcpp-parse.y | 195 ++ src/compiler/glsl/glcpp/glcpp.h | 8 +- src/compiler/glsl/glcpp/pp.c | 39 +++ 3 files changed, 78 insertions(+), 164 deletions(-) diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y index 898a26044f..debf12bef8 100644 --- a/src/compiler/glsl/glcpp/glcpp-parse.y +++ b/src/compiler/glsl/glcpp/glcpp-parse.y @@ -209,12 +209,7 @@ line: | SPACE control_line | text_line { _glcpp_parser_print_expanded_token_list (parser, $1); - const char *newline_str = "\n"; - size_t size = strlen(newline_str); - - ralloc_str_append(&parser->output, newline_str, - parser->output_length, size); - parser->output_length += size; + _mesa_string_buffer_append_char(parser->output, '\n'); } | expanded_line ; @@ -233,20 +228,16 @@ expanded_line: | LINE_EXPANDED integer_constant NEWLINE { parser->has_new_line_number = 1; parser->new_line_number = $2; - ralloc_asprintf_rewrite_tail (&parser->output, - &parser->output_length, - "#line %" PRIiMAX "\n", - $2); + _mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX "\n", $2); } | LINE_EXPANDED integer_constant integer_constant NEWLINE { parser->has_new_line_number = 1; parser->new_line_number = $2; parser->has_new_source_number = 1; parser->new_source_number = $3; - ralloc_asprintf_rewrite_tail (&parser->output, - &parser->output_length, - "#line %" PRIiMAX " %" PRIiMAX "\n", - $2, $3); + _mesa_string_buffer_printf(parser->output, + "#line %" PRIiMAX " %" PRIiMAX "\n", + $2, $3); } ; @@ -264,12 +255,7 @@ define: control_line: control_line_success { - const char *newline_str = "\n"; - size_t size = strlen(newline_str); - - ralloc_str_append(&parser->output, newline_str, - parser->output_length, size); - parser->output_length += size; + _mesa_string_buffer_append_char(parser->output, '\n'); } | control_line_error | HASH_TOKEN LINE pp_tokens NEWLINE { @@ -459,7 +445,7 @@ control_line_success: glcpp_parser_resolve_implicit_version(parser); } | HASH_TOKEN PRAGMA NEWLINE { - ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, "#%s", $2); + _mesa_string_buffer_printf(parser->output, "#%s", $2); } ; @@ -1137,133 +1123,61 @@ _token_list_equal_ignoring_space(token_list_t *a, token_list_t *b) } static void -_token_print(char **out, size_t *len, token_t *token) +_token_print(struct _mesa_string_buffer *out, token_t *token) { if (token->type < 256) { - size_t size = sizeof(char); - - ralloc_str_append(out, (char *) &token->type, *len, size); - *len += size; + _mesa_string_buffer_printf(out,"%c", token->type); return; } switch (token->type) { case INTEGER: - ralloc_asprintf_rewrite_tail (out, len, "%" PRIiMAX, token->value.ival); + _mesa_string_buffer_printf(out, "%" PRIiMAX, token->value.ival); break; case IDENTIFIER: case INTEGER_STRING: - case OTHER: { - size_t size = strlen(token->value.str); - - ralloc_str_append(out, token->value.str, *len, size); - *len += size; + case OTHER: + _mesa_string_buffer_append(out, token->value.str); break; - } - case SPACE: { - const char *token_str = " "; - size_t size = strlen(token_str); - - ralloc_str_append(out, token_str, *len, size); - *len += size; + case SPACE: + _mesa_string_buffer_append(out, " "); break; - } - case LEFT_SHIFT: { - const char *token_str = "<<"; - size_t size = strlen(token_str); - - ralloc_str_append(out, token_str, *len, size); - *len += size; + case LEFT_SHIFT: + _mesa_string_buffer_append(out, "<<"); break; - } - case RIGHT_SHIFT: { - const char *token_str = ">>"; - size_t size = strlen(token_str); - - ralloc_str_append(out, token_str, *len, size); - *len += size; + case RIGHT_SHIFT: + _mesa_string_buffer_append(out, ">>"); break;
[Mesa-dev] [PATCH 7/8] port to gtest
--- src/util/tests/string_buffer/append_and_print.c | 99 -- src/util/tests/string_buffer/append_and_print.cpp | 221 ++ 2 files changed, 221 insertions(+), 99 deletions(-) delete mode 100644 src/util/tests/string_buffer/append_and_print.c create mode 100644 src/util/tests/string_buffer/append_and_print.cpp diff --git a/src/util/tests/string_buffer/append_and_print.c b/src/util/tests/string_buffer/append_and_print.c deleted file mode 100644 index 71b857892f..00 --- a/src/util/tests/string_buffer/append_and_print.c +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright © 2017 Thomas Helland - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include -#include -#include -#include -#include "string_buffer.h" - -int -main(int argc, char **argv) -{ - (void) argc; - (void) argv; - - struct _mesa_string_buffer *buf; - char *str1 = "test1"; - char *str2 = "test2"; - char *str3 = "test1test2"; - char str4[80]; - char str5[40]; - - buf = _mesa_string_buffer_create(NULL, 6); - - /* The string terminator needs one byte, so there should be five left */ - assert(_mesa_string_buffer_space_left(buf) == 5); - - _mesa_string_buffer_append(buf, str1); - - /* The string should now be full */ - assert(_mesa_string_buffer_space_left(buf) == 0); - - /* The content should be equal */ - assert(strcmp(buf->buf, str1) == 0); - - /* Add more, so that the string is resized */ - _mesa_string_buffer_append(buf, str2); - - /* The size of the buffer should have been doubled. -* We have 5 + 5 + 1 characters, so there should be one left. -*/ - assert(_mesa_string_buffer_space_left(buf) == 1); - - /* The string should now be equal to str3 */ - assert(strcmp(buf->buf, str3) == 0); - - /* Compile a string with some formatting */ - sprintf(str4, "Testing formatting %d, %f", 100, 1.0); - - /* Clear the string buffer first */ - _mesa_string_buffer_clear(buf); - - /* Check that the length of the string is reset */ - assert(buf->length == 0); - - /* Then test printing some formatted text */ - _mesa_string_buffer_printf(buf, "Testing formatting %d, %f", 100, 1.0); - - /* The string should now be equal to str4 */ - assert(strcmp(buf->buf, &str4) == 0); - - /* Compile a string with some other formatting */ - sprintf(str5, "Testing formatting %d, %x", 100, 0xDEADBEAF); - - /* Concatenate str5 to str4 */ - strcat(str4, str5); - - /* Now use the formatted append function */ - _mesa_string_buffer_printf(buf, "Testing formatting %d, %x", 100, 0xDEADBEAF); - - /* The string buffer should now be equal to str4 */ - assert(strcmp(buf->buf, &str4) == 0); - - /* Finally, clean up after us */ - _mesa_string_buffer_destroy(buf); - - return 0; -} diff --git a/src/util/tests/string_buffer/append_and_print.cpp b/src/util/tests/string_buffer/append_and_print.cpp new file mode 100644 index 00..c3ccd8f9d1 --- /dev/null +++ b/src/util/tests/string_buffer/append_and_print.cpp @@ -0,0 +1,221 @@ +/* + * Copyright © 2017 Thomas Helland + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFR
[Mesa-dev] [PATCH 5/8] glcpp: Avoid unnecessary call to strlen
Length of the token was already calculated by flex and stored in yyleng, no need to implicitly call strlen() via linear_strdup(). --- src/compiler/glsl/glcpp/glcpp-lex.l | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/compiler/glsl/glcpp/glcpp-lex.l b/src/compiler/glsl/glcpp/glcpp-lex.l index 381b97364a..93e5cb3845 100644 --- a/src/compiler/glsl/glcpp/glcpp-lex.l +++ b/src/compiler/glsl/glcpp/glcpp-lex.l @@ -101,7 +101,8 @@ void glcpp_set_column (int column_no , yyscan_t yyscanner); #define RETURN_STRING_TOKEN(token) \ do {\ if (! parser->skipping) { \ - yylval->str = linear_strdup(yyextra->linalloc, yytext); \ + yylval->str = linear_alloc_child(yyextra->linalloc, yyleng + 1);\ + memcpy(yylval->str, yytext, yyleng + 1);\ RETURN_TOKEN_NEVER_SKIP (token);\ } \ } while(0) -- 2.13.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/8] Resend of preprocessor series
This is a resend of the string buffer implementation and related patches sent out back in May. I've done one more change to the string buffer; using u_string.h for a compatible vsnprintf version to reduce the code even more. I've not been able to test this due to two build breakages (xmlpool and dri) that I'm still trying to figure out of. But since I promised to send these out this evening, I'm sending them untested. I did test them thoroughly the last time around though, so I believe it should be mostly good as long as I haven't messed up the rebasing. I believe the string buffer part of the series is the most important; the rest I've not really gotten around to performance test much. Thomas Helland (7): util: Add a string buffer implementation util: Add tests for the string buffer glsl: Change the parser to use the string buffer glcpp: Use string_buffer for line continuation removal glcpp: Avoid unnecessary call to strlen port to gtest fix test makefile Vladislav Egorov (1): glcpp: Use Bloom filter before identifier search configure.ac | 2 + src/compiler/glsl/glcpp/glcpp-lex.l | 3 +- src/compiler/glsl/glcpp/glcpp-parse.y | 219 - src/compiler/glsl/glcpp/glcpp.h | 18 +- src/compiler/glsl/glcpp/pp.c | 64 --- src/util/Makefile.am | 3 +- src/util/Makefile.sources | 2 + src/util/string_buffer.c | 155 +++ src/util/string_buffer.h | 87 + src/util/tests/string_buffer/Makefile.am | 38 src/util/tests/string_buffer/append_and_print.cpp | 221 ++ 11 files changed, 633 insertions(+), 179 deletions(-) create mode 100644 src/util/string_buffer.c create mode 100644 src/util/string_buffer.h create mode 100644 src/util/tests/string_buffer/Makefile.am create mode 100644 src/util/tests/string_buffer/append_and_print.cpp -- 2.13.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/8] util: Add a string buffer implementation
Based on Vladislav Egorovs work on the preprocessor, but split out to a util functionality that should be universal. Setup, teardown, memory handling and general layout is modeled around the hash_table and the set, to make it familiar for everyone. A notable change is that this implementation is always null terminated. The rationale is that it will be less error-prone, as one might access the buffer directly, thereby reading a non-terminated string. Also, vsnprintf and friends prints the null-terminator. V2: Address review feedback from Timothy and Grazvydas - Fix MINGW preprocessor check - Changed len from uint to int - Make string argument const in append function - Move to header and inline append function - Add crimp_to_fit function for resizing buffer V3: Move include of ralloc to string_buffer.h V4: Use u_string.h for a cross-platform working vsnprintf --- src/util/Makefile.sources | 2 + src/util/string_buffer.c | 155 ++ src/util/string_buffer.h | 87 ++ 3 files changed, 244 insertions(+) create mode 100644 src/util/string_buffer.c create mode 100644 src/util/string_buffer.h diff --git a/src/util/Makefile.sources b/src/util/Makefile.sources index 4ed4e39f03..c7f6516a99 100644 --- a/src/util/Makefile.sources +++ b/src/util/Makefile.sources @@ -37,6 +37,8 @@ MESA_UTIL_FILES := \ simple_list.h \ slab.c \ slab.h \ + string_buffer.c \ + string_buffer.h \ strndup.h \ strtod.c \ strtod.h \ diff --git a/src/util/string_buffer.c b/src/util/string_buffer.c new file mode 100644 index 00..e2fa612976 --- /dev/null +++ b/src/util/string_buffer.c @@ -0,0 +1,155 @@ +/* + * Copyright © 2017 Thomas Helland + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ +#include "string_buffer.h" + +static bool +ensure_capacity(struct _mesa_string_buffer *str, uint32_t needed_capacity) +{ + if (needed_capacity <= str->capacity) + return true; + + /* Too small, we have to resize our buffer. +* Double until we can fit the new string. +*/ + uint32_t new_capacity = str->capacity * 2; + while (needed_capacity > new_capacity) + new_capacity *= 2; + + str->buf = reralloc_array_size(str, str->buf, sizeof(char), new_capacity); + if (str->buf == NULL) + return false; + + str->capacity = new_capacity; + return true; +} + +struct _mesa_string_buffer * +_mesa_string_buffer_create(void *mem_ctx, uint32_t initial_capacity) +{ + struct _mesa_string_buffer *str; + str = ralloc(mem_ctx, struct _mesa_string_buffer); + + if (str == NULL) + return NULL; + + /* If no initial capacity is set then set it to something */ + str->capacity = initial_capacity ? initial_capacity : 8; + str->buf = ralloc_array(str, char, str->capacity); + str->length = 0; + str->buf[str->length] = '\0'; + return str; +} + +void +_mesa_string_buffer_destroy(struct _mesa_string_buffer *str) +{ + ralloc_free(str); +} + +bool +_mesa_string_buffer_append_all(struct _mesa_string_buffer *str, + uint32_t num_args, ...) +{ + int i; + char* s; + va_list args; + va_start(args, num_args); + for (i = 0; i < num_args; i++) { + s = va_arg(args, char*); + if (!_mesa_string_buffer_append_len(str, s, strlen(s))) + return false; + } + va_end(args); + return true; +} + +bool +_mesa_string_buffer_append_len(struct _mesa_string_buffer *str, + const char *c, uint32_t len) +{ + uint32_t needed_length = str->length + len + 1; + if (!ensure_capacity(str, needed_length)) + return false; + + memcpy(str->buf + str->length, c, len); + str->length += len; + str->buf[str->length] = '\0'; + return true; +} + +void +_mesa_string_buffer_clear(struct _mesa_string_buffer *str) +{ + str->length = 0; + str->buf[
Re: [Mesa-dev] [PATCH 20/47] i965/fs: Define new shader opcodes to set rounding modes
Francisco Jerez writes: > Chema Casanova writes: > >> El 25/08/17 a las 20:09, Francisco Jerez escribió: >>> Alejandro Piñeiro writes: >>> Although it is possible to emit them directly as AND/OR on brw_fs_nir, having specific opcodes makes it easier to remove duplicate settings later. Signed-off-by: Alejandro Piñeiro Signed-off-by: Jose Maria Casanova Crespo --- src/intel/compiler/brw_eu.h | 3 +++ src/intel/compiler/brw_eu_defines.h | 9 + src/intel/compiler/brw_eu_emit.c| 19 +++ src/intel/compiler/brw_fs_generator.cpp | 8 src/intel/compiler/brw_shader.cpp | 5 + 5 files changed, 44 insertions(+) diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index a3a9c63239d..0a7f8020398 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -500,6 +500,9 @@ brw_broadcast(struct brw_codegen *p, struct brw_reg src, struct brw_reg idx); +void +brw_rounding_mode(struct brw_codegen *p, + enum brw_rnd_mode mode); /*** * brw_eu_util.c: */ diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 1af835d47ed..50435df2fcf 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -388,6 +388,9 @@ enum opcode { SHADER_OPCODE_TYPED_SURFACE_WRITE, SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL, + SHADER_OPCODE_RND_MODE_RTE, + SHADER_OPCODE_RND_MODE_RTZ, + >>> We don't need an opcode for each possible rounding mode (there's also RU >>> and RD). How about you add a single SHADER_OPCODE_RND_MODE opcode >>> taking an immediate with the right rounding mode? >> I like the proposal. It is better having a unique opcode for setting the >> rounding mode. Changed for v2 of this patch. >>> Also, you should be marking the rounding mode opcodes as >>> has_side_effects(), because otherwise you're giving the scheduler the >>> freedom of moving your rounding mode update instruction past the >>> instruction you wanted it to have an effect on... >> Well pointed, we already realized that it was missing while debugging >> the latency problem of the control register. It was hiding the problem >> re-scheduling the cr0 modification to the beginning of the shader. SHADER_OPCODE_MEMORY_FENCE, SHADER_OPCODE_GEN4_SCRATCH_READ, @@ -1233,4 +1236,10 @@ enum brw_message_target { /* R0 */ # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT27 +enum PACKED brw_rnd_mode { + BRW_RND_MODE_UNSPECIFIED, + BRW_RND_MODE_RTE, + BRW_RND_MODE_RTZ, >>> Since you're introducing a back-end-specific rounding mode enum already, >>> why not use the hardware values right away so you avoid hard-coding >>> magic constants below. >> At the end we removed MODE_UNSPECIFIED as it isn't really needed and we >> include Round Up and Down in the enum assignation based using the PRM >> values. Also renamed RTE for RTNE to maintain coherence with nir >> conversion modifiers using the same acronym as PRM. >> >> +enum PACKED brw_rnd_mode { >> + BRW_RND_MODE_RTNE = 0, /* Round to Nearest or Even */ >> + BRW_RND_MODE_RU = 1, /* Round Up, toward +inf */ >> + BRW_RND_MODE_RD = 2, /* Round Down, toward -inf */ >> + BRW_RND_MODE_RTZ = 3 /* Round Toward Zero */ >> +}; >> >> I have a doubt about how to avoid the magic constants to close the v2 of >> this patch. One approach would be using the same code structure and >> taking advantage using the codification of rounding field. This way we >> just formula and expect that the C compiler optimizer to guess that the >> immediate value is a constant. >> > > Hm, I'm not particularly worried about the C++ compiler behavior, it's > most likely immaterial. > >> switch (mode) { >> case BRW_RND_MODE_RTZ: >> - inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), >> brw_imm_ud(0x0030u)); >> + inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), >> brw_imm_ud(((unsigned int) mode << 4))); >> break; >> - case BRW_RND_MODE_RTE: >> - inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), >> brw_imm_ud(0xffcfu)); >> + case BRW_RND_MODE_RTNE: >> + inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), >> brw_imm_ud(((unsigned int) mode << 4) | ~0x0030u)); >> break; >> default: >> > > I don't think this is particularly advantageous since you still need to > special-case every possible rounding mode. > >> Another approach could be to implement a general solution for all >> supported rounding modes by the hw including Round Up and Down using >> bitwise operations. >> >> /** >> * Changes the floating point rounding
Re: [Mesa-dev] [PATCH 20/47] i965/fs: Define new shader opcodes to set rounding modes
Chema Casanova writes: > El 25/08/17 a las 20:09, Francisco Jerez escribió: >> Alejandro Piñeiro writes: >> >>> Although it is possible to emit them directly as AND/OR on brw_fs_nir, >>> having specific opcodes makes it easier to remove duplicate settings >>> later. >>> >>> Signed-off-by: Alejandro Piñeiro >>> Signed-off-by: Jose Maria Casanova Crespo >>> --- >>> src/intel/compiler/brw_eu.h | 3 +++ >>> src/intel/compiler/brw_eu_defines.h | 9 + >>> src/intel/compiler/brw_eu_emit.c| 19 +++ >>> src/intel/compiler/brw_fs_generator.cpp | 8 >>> src/intel/compiler/brw_shader.cpp | 5 + >>> 5 files changed, 44 insertions(+) >>> >>> diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h >>> index a3a9c63239d..0a7f8020398 100644 >>> --- a/src/intel/compiler/brw_eu.h >>> +++ b/src/intel/compiler/brw_eu.h >>> @@ -500,6 +500,9 @@ brw_broadcast(struct brw_codegen *p, >>>struct brw_reg src, >>>struct brw_reg idx); >>> >>> +void >>> +brw_rounding_mode(struct brw_codegen *p, >>> + enum brw_rnd_mode mode); >>> /*** >>> * brw_eu_util.c: >>> */ >>> diff --git a/src/intel/compiler/brw_eu_defines.h >>> b/src/intel/compiler/brw_eu_defines.h >>> index 1af835d47ed..50435df2fcf 100644 >>> --- a/src/intel/compiler/brw_eu_defines.h >>> +++ b/src/intel/compiler/brw_eu_defines.h >>> @@ -388,6 +388,9 @@ enum opcode { >>> SHADER_OPCODE_TYPED_SURFACE_WRITE, >>> SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL, >>> >>> + SHADER_OPCODE_RND_MODE_RTE, >>> + SHADER_OPCODE_RND_MODE_RTZ, >>> + >> We don't need an opcode for each possible rounding mode (there's also RU >> and RD). How about you add a single SHADER_OPCODE_RND_MODE opcode >> taking an immediate with the right rounding mode? > I like the proposal. It is better having a unique opcode for setting the > rounding mode. Changed for v2 of this patch. >> Also, you should be marking the rounding mode opcodes as >> has_side_effects(), because otherwise you're giving the scheduler the >> freedom of moving your rounding mode update instruction past the >> instruction you wanted it to have an effect on... > Well pointed, we already realized that it was missing while debugging > the latency problem of the control register. It was hiding the problem > re-scheduling the cr0 modification to the beginning of the shader. >>> SHADER_OPCODE_MEMORY_FENCE, >>> >>> SHADER_OPCODE_GEN4_SCRATCH_READ, >>> @@ -1233,4 +1236,10 @@ enum brw_message_target { >>> /* R0 */ >>> # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT 27 >>> >>> +enum PACKED brw_rnd_mode { >>> + BRW_RND_MODE_UNSPECIFIED, >>> + BRW_RND_MODE_RTE, >>> + BRW_RND_MODE_RTZ, >> Since you're introducing a back-end-specific rounding mode enum already, >> why not use the hardware values right away so you avoid hard-coding >> magic constants below. > At the end we removed MODE_UNSPECIFIED as it isn't really needed and we > include Round Up and Down in the enum assignation based using the PRM > values. Also renamed RTE for RTNE to maintain coherence with nir > conversion modifiers using the same acronym as PRM. > > +enum PACKED brw_rnd_mode { > + BRW_RND_MODE_RTNE = 0, /* Round to Nearest or Even */ > + BRW_RND_MODE_RU = 1, /* Round Up, toward +inf */ > + BRW_RND_MODE_RD = 2, /* Round Down, toward -inf */ > + BRW_RND_MODE_RTZ = 3 /* Round Toward Zero */ > +}; > > I have a doubt about how to avoid the magic constants to close the v2 of > this patch. One approach would be using the same code structure and > taking advantage using the codification of rounding field. This way we > just formula and expect that the C compiler optimizer to guess that the > immediate value is a constant. > Hm, I'm not particularly worried about the C++ compiler behavior, it's most likely immaterial. > switch (mode) { > case BRW_RND_MODE_RTZ: > - inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), > brw_imm_ud(0x0030u)); > + inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), > brw_imm_ud(((unsigned int) mode << 4))); > break; > - case BRW_RND_MODE_RTE: > - inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), > brw_imm_ud(0xffcfu)); > + case BRW_RND_MODE_RTNE: > + inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), > brw_imm_ud(((unsigned int) mode << 4) | ~0x0030u)); > break; > default: > I don't think this is particularly advantageous since you still need to special-case every possible rounding mode. > Another approach could be to implement a general solution for all > supported rounding modes by the hw including Round Up and Down using > bitwise operations. > > /** > * Changes the floating point rounding mode updating the control register > * field defined at cr0.0[5-6] bits. This function supports the changes to > * RTNE (00), RU (01), RD (10) and RTZ (11
Re: [Mesa-dev] [PATCH 20/47] i965/fs: Define new shader opcodes to set rounding modes
El 25/08/17 a las 20:09, Francisco Jerez escribió: > Alejandro Piñeiro writes: > >> Although it is possible to emit them directly as AND/OR on brw_fs_nir, >> having specific opcodes makes it easier to remove duplicate settings >> later. >> >> Signed-off-by: Alejandro Piñeiro >> Signed-off-by: Jose Maria Casanova Crespo >> --- >> src/intel/compiler/brw_eu.h | 3 +++ >> src/intel/compiler/brw_eu_defines.h | 9 + >> src/intel/compiler/brw_eu_emit.c| 19 +++ >> src/intel/compiler/brw_fs_generator.cpp | 8 >> src/intel/compiler/brw_shader.cpp | 5 + >> 5 files changed, 44 insertions(+) >> >> diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h >> index a3a9c63239d..0a7f8020398 100644 >> --- a/src/intel/compiler/brw_eu.h >> +++ b/src/intel/compiler/brw_eu.h >> @@ -500,6 +500,9 @@ brw_broadcast(struct brw_codegen *p, >>struct brw_reg src, >>struct brw_reg idx); >> >> +void >> +brw_rounding_mode(struct brw_codegen *p, >> + enum brw_rnd_mode mode); >> /*** >> * brw_eu_util.c: >> */ >> diff --git a/src/intel/compiler/brw_eu_defines.h >> b/src/intel/compiler/brw_eu_defines.h >> index 1af835d47ed..50435df2fcf 100644 >> --- a/src/intel/compiler/brw_eu_defines.h >> +++ b/src/intel/compiler/brw_eu_defines.h >> @@ -388,6 +388,9 @@ enum opcode { >> SHADER_OPCODE_TYPED_SURFACE_WRITE, >> SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL, >> >> + SHADER_OPCODE_RND_MODE_RTE, >> + SHADER_OPCODE_RND_MODE_RTZ, >> + > We don't need an opcode for each possible rounding mode (there's also RU > and RD). How about you add a single SHADER_OPCODE_RND_MODE opcode > taking an immediate with the right rounding mode? I like the proposal. It is better having a unique opcode for setting the rounding mode. Changed for v2 of this patch. > Also, you should be marking the rounding mode opcodes as > has_side_effects(), because otherwise you're giving the scheduler the > freedom of moving your rounding mode update instruction past the > instruction you wanted it to have an effect on... Well pointed, we already realized that it was missing while debugging the latency problem of the control register. It was hiding the problem re-scheduling the cr0 modification to the beginning of the shader. >> SHADER_OPCODE_MEMORY_FENCE, >> >> SHADER_OPCODE_GEN4_SCRATCH_READ, >> @@ -1233,4 +1236,10 @@ enum brw_message_target { >> /* R0 */ >> # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT 27 >> >> +enum PACKED brw_rnd_mode { >> + BRW_RND_MODE_UNSPECIFIED, >> + BRW_RND_MODE_RTE, >> + BRW_RND_MODE_RTZ, > Since you're introducing a back-end-specific rounding mode enum already, > why not use the hardware values right away so you avoid hard-coding > magic constants below. At the end we removed MODE_UNSPECIFIED as it isn't really needed and we include Round Up and Down in the enum assignation based using the PRM values. Also renamed RTE for RTNE to maintain coherence with nir conversion modifiers using the same acronym as PRM. +enum PACKED brw_rnd_mode { + BRW_RND_MODE_RTNE = 0, /* Round to Nearest or Even */ + BRW_RND_MODE_RU = 1, /* Round Up, toward +inf */ + BRW_RND_MODE_RD = 2, /* Round Down, toward -inf */ + BRW_RND_MODE_RTZ = 3 /* Round Toward Zero */ +}; I have a doubt about how to avoid the magic constants to close the v2 of this patch. One approach would be using the same code structure and taking advantage using the codification of rounding field. This way we just formula and expect that the C compiler optimizer to guess that the immediate value is a constant. switch (mode) { case BRW_RND_MODE_RTZ: - inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), brw_imm_ud(0x0030u)); + inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), brw_imm_ud(((unsigned int) mode << 4))); break; - case BRW_RND_MODE_RTE: - inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), brw_imm_ud(0xffcfu)); + case BRW_RND_MODE_RTNE: + inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), brw_imm_ud(((unsigned int) mode << 4) | ~0x0030u)); break; default: Another approach could be to implement a general solution for all supported rounding modes by the hw including Round Up and Down using bitwise operations. /** * Changes the floating point rounding mode updating the control register * field defined at cr0.0[5-6] bits. This function supports the changes to * RTNE (00), RU (01), RD (10) and RTZ (11) rounding using bitwise operations. * Only RTNE and RTZ rounding are enabled at nir. */ void brw_rounding_mode(struct brw_codegen *p, enum brw_rnd_mode mode) { const unsigned int mask = 0x30u; const unsigned int enable_bits = ((unsigned int) mode) << 4; const unsigned int disable_bits = enable_bits | ~mask; /* Used by RTNE rounding to set field
[Mesa-dev] [Bug 102454] glibc 2.26 doesn't provide anymore xlocale.h
https://bugs.freedesktop.org/show_bug.cgi?id=102454 --- Comment #1 from Emil Velikov --- The patch in the bug report isn't quite right (aka remove the header and guards all together). Instead one should add a configure check - AC_LINK_IFELSE() ... which: - includes xlocale.h reusing the HAVE_XLOCALE_H guard - references strtod_l w/o any guard, setting a macro HAVE_FOO Then update the code - swap the __GNU_SOURCE guards through the file with HAVE_FOO - HAVE_XLOCALE_H should guard _only_ the header inclusion -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 08/10] egl/wayland: remove dri2_surf width/height double init.
On 29 August 2017 17:33:57 BST, Emil Velikov wrote: > On 29 August 2017 at 15:40, Eric Engestrom > wrote: > > On Sunday, 2017-08-27 11:20:33 +0100, Emil Velikov wrote: > >> From: Emil Velikov > >> > >> The dimensions are already set [to 0 or the value provided by the > >> attributes list] by the _eglInitSurface() call further up. > >> > >> The values are updated, as the DRI driver calls the > DRI2/IMAGE_LOADER' > >> get_buffers, shortly before making use of the values. > >> > >> Signed-off-by: Emil Velikov > >> --- > >> src/egl/drivers/dri2/platform_wayland.c | 3 --- > >> 1 file changed, 3 deletions(-) > >> > >> diff --git a/src/egl/drivers/dri2/platform_wayland.c > b/src/egl/drivers/dri2/platform_wayland.c > >> index f9554fccde9..89e94e11f4f 100644 > >> --- a/src/egl/drivers/dri2/platform_wayland.c > >> +++ b/src/egl/drivers/dri2/platform_wayland.c > >> @@ -198,9 +198,6 @@ dri2_wl_create_window_surface(_EGLDriver *drv, > _EGLDisplay *disp, > >> dri2_surf->wl_win->private = dri2_surf; > >> dri2_surf->wl_win->destroy_window_callback = > destroy_window_callback; > >> > >> - dri2_surf->base.Width = -1; > >> - dri2_surf->base.Height = -1; > >> - > > > > We actually have a local patch here that replaces this `-1` > > initialisation with the dimensions of the wayland `window`: > > > > 8< > > diff --git a/src/egl/drivers/dri2/platform_wayland.c > b/src/egl/drivers/dri2/platform_wayland.c > > index 85a11f4..7719407 100644 > > --- a/src/egl/drivers/dri2/platform_wayland.c > > +++ b/src/egl/drivers/dri2/platform_wayland.c > > @@ -183,8 +183,8 @@ dri2_wl_create_window_surface(_EGLDriver *drv, > _EGLDisplay *disp, > > dri2_surf->wl_win->private = dri2_surf; > > dri2_surf->wl_win->destroy_window_callback = > destroy_window_callback; > > > > - dri2_surf->base.Width = -1; > > - dri2_surf->base.Height = -1; > > + dri2_surf->base.Width = window->width; > > + dri2_surf->base.Height = window->height; > > > > config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT, > > dri2_surf->base.GLColorspace); > > >8 > > > > This apparently fixes dEQP crashes caused by an assert: > > deRandom.hpp:93: Unknown function: Assertion `min <= max' > failed. > > > > No dEQP test is mentioned on our patch, but I think this happened on > > dEQP-EGL.functional.resize.surface_size.* > > > Having another look, we seems to be rather inconsistent when we set > them - see below. > At the same time, I doubt any !X11 dEQP resize tests work: > - iirc there's no wayland/drm/surfaceless support > - none of the three has a .query_surface hook > > -Emil > > X11/dri2 > - create_*_surface, all but !pbuffer > - query_surface via xcb_get_geometry > > X11/dri3: > - create_*_surface, all > - query_surface via xcb_get_geometry > > drm > - create_*_surface, all (window only) > - query_surface - no hook > > surfaceless > - create_*_surface, none (pbuffer only) > - query_surface - no hook > > wayland > - create_*_surface, none (window only) > - query_surface - no hook Oh, uh… I think this might also be in a local patch ^^ We have a bunch of patches like that, that I want to upstream, but not enough bandwidth and too many high priority tasks lining up. I'll try and have a look tomorrow for the query_surface hook. Cheers, Eric ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 11/19] anv: Use GNU C empty brace initializer
On 29 August 2017 18:11:45 BST, Matt Turner wrote: > On Tue, Aug 29, 2017 at 3:11 AM, Eric Engestrom > wrote: > > On Monday, 2017-08-28 14:57:13 -0700, Matt Turner wrote: > >> Avoids Clang's warning about the current code: > >> > >>warning: suggest braces around initialization of subobject > > > > I'm not sure about this patch [1], but everything else in this > series is: > > Reviewed-by: Eric Engestrom > > > > The "mark `UNUSED` because they are unused in some generations" > patches > > might be better to mark as `MAYBE_UNUSED`; they both resolve to the > same > > thing for compilers, but the latter is gives more information to > humans > > (ie. "this isn't always unused"). > > > > [1] > > Can you test this with GCC 4.x and 7.x? If memory serves, this init > format > > caused warnings on either old GCC or new GCC. > > I think `= {{0}};` was the only format that both compilers were > happy with. > > Tested 4.9.4 and 7.1.0. No warnings from either of them about these > initializers :) Given this and Emil's comment, this patch is r-b me with the rest of the series :) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 11/19] anv: Use GNU C empty brace initializer
On 29 August 2017 at 18:10, Matt Turner wrote: > On Tue, Aug 29, 2017 at 3:35 AM, Emil Velikov > wrote: >> On 29 August 2017 at 11:11, Eric Engestrom wrote: >>> On Monday, 2017-08-28 14:57:13 -0700, Matt Turner wrote: Avoids Clang's warning about the current code: warning: suggest braces around initialization of subobject >>> >>> I'm not sure about this patch [1], but everything else in this series is: >> IIRC "{}" is the least likely way to avoid the warnings, across >> GCC/Clang versions. >> It's not part of the C spec, but since it works - not sure how much to >> bother. > > I'm surprised to hear you say that. I've tested with gcc-4.9.4, 6.4.0, > 7.1.0; and clang-4.0, and none of them warn about {} > Eric corrected me on IRC, but I forgot to mention here - "s/least/most/" > I think when we went through this before (in NIR) we couldn't use {} > because it needs to compile with MSVC, and MSVC doesn't allow that in > C. > Precisely. The "most" portable solution that I know of is memset(). >> Would be great to have the issue reported (and fixed) in Clang though. >> AFAICT both {0} and {0,} are valid, if memory serves me right. > > I don't really understand the nuances of {0}, {0,}, {{0}}, etc, and I > get the sense that that's the case for nearly everyone else as well. > AFAICT {0} and {0,} should just work everywhere, modulo bugs. [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119 [2] https://bugs.llvm.org/show_bug.cgi?id=21689 > The most prominent bug I see in all of this is that {} isn't part of > standard C (it is part of standard C++!) Agreed. Not sure how easy it will be to convince the committee. -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 11/19] anv: Use GNU C empty brace initializer
On Tue, Aug 29, 2017 at 3:11 AM, Eric Engestrom wrote: > On Monday, 2017-08-28 14:57:13 -0700, Matt Turner wrote: >> Avoids Clang's warning about the current code: >> >>warning: suggest braces around initialization of subobject > > I'm not sure about this patch [1], but everything else in this series is: > Reviewed-by: Eric Engestrom > > The "mark `UNUSED` because they are unused in some generations" patches > might be better to mark as `MAYBE_UNUSED`; they both resolve to the same > thing for compilers, but the latter is gives more information to humans > (ie. "this isn't always unused"). > > [1] > Can you test this with GCC 4.x and 7.x? If memory serves, this init format > caused warnings on either old GCC or new GCC. > I think `= {{0}};` was the only format that both compilers were happy with. Tested 4.9.4 and 7.1.0. No warnings from either of them about these initializers :) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 11/19] anv: Use GNU C empty brace initializer
On Tue, Aug 29, 2017 at 3:35 AM, Emil Velikov wrote: > On 29 August 2017 at 11:11, Eric Engestrom wrote: >> On Monday, 2017-08-28 14:57:13 -0700, Matt Turner wrote: >>> Avoids Clang's warning about the current code: >>> >>>warning: suggest braces around initialization of subobject >> >> I'm not sure about this patch [1], but everything else in this series is: > IIRC "{}" is the least likely way to avoid the warnings, across > GCC/Clang versions. > It's not part of the C spec, but since it works - not sure how much to bother. I'm surprised to hear you say that. I've tested with gcc-4.9.4, 6.4.0, 7.1.0; and clang-4.0, and none of them warn about {} I think when we went through this before (in NIR) we couldn't use {} because it needs to compile with MSVC, and MSVC doesn't allow that in C. > Would be great to have the issue reported (and fixed) in Clang though. > AFAICT both {0} and {0,} are valid, if memory serves me right. I don't really understand the nuances of {0}, {0,}, {{0}}, etc, and I get the sense that that's the case for nearly everyone else as well. The most prominent bug I see in all of this is that {} isn't part of standard C (it is part of standard C++!) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Mesa-stable] [PATCH v2 1/2] gallivm: correct channel shift logic on big endian
On 23 August 2017 at 21:32, Ben Crocker wrote: > From: Ray Strode > > lp_build_fetch_rgba_soa fetches a texel from a texture. > Part of that process involves first gathering the element > together from memory into a packed format, and then breaking > out the individual color channels into separate, parallel > arrays. > > The code fails to account for endianess when reading the packed > values. > > This commit attempts to correct the problem by reversing the order > the packed values are read on big endian systems. > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100613 > Cc: "17.2" "17.1" > > --- > src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | 8 +++- > 1 file changed, 7 insertions(+), 1 deletion(-) > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c > b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c > index 98eb694..22c19b1 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c > @@ -650,7 +650,13 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, > for (i = 0; i < format_desc->nr_channels; i++) { > struct util_format_channel_description chan_desc = > format_desc->channel[i]; > unsigned blockbits = type.width; > -unsigned vec_nr = chan_desc.shift / type.width; > +unsigned vec_nr; > + > +#ifdef PIPE_ARCH_BIG_ENDIAN > +vec_nr = (format_desc->block.bits - (chan_desc.shift + > chan_desc.size)) / type.width; > +#else > +vec_nr = chan_desc.shift / type.width; > +#endif > chan_desc.shift %= type.width; > Roland, I think you're one of the more knowledgeable people for things gallivm. Can you please review this patch? -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 08/10] egl/wayland: remove dri2_surf width/height double init.
On 29 August 2017 at 15:40, Eric Engestrom wrote: > On Sunday, 2017-08-27 11:20:33 +0100, Emil Velikov wrote: >> From: Emil Velikov >> >> The dimensions are already set [to 0 or the value provided by the >> attributes list] by the _eglInitSurface() call further up. >> >> The values are updated, as the DRI driver calls the DRI2/IMAGE_LOADER' >> get_buffers, shortly before making use of the values. >> >> Signed-off-by: Emil Velikov >> --- >> src/egl/drivers/dri2/platform_wayland.c | 3 --- >> 1 file changed, 3 deletions(-) >> >> diff --git a/src/egl/drivers/dri2/platform_wayland.c >> b/src/egl/drivers/dri2/platform_wayland.c >> index f9554fccde9..89e94e11f4f 100644 >> --- a/src/egl/drivers/dri2/platform_wayland.c >> +++ b/src/egl/drivers/dri2/platform_wayland.c >> @@ -198,9 +198,6 @@ dri2_wl_create_window_surface(_EGLDriver *drv, >> _EGLDisplay *disp, >> dri2_surf->wl_win->private = dri2_surf; >> dri2_surf->wl_win->destroy_window_callback = destroy_window_callback; >> >> - dri2_surf->base.Width = -1; >> - dri2_surf->base.Height = -1; >> - > > We actually have a local patch here that replaces this `-1` > initialisation with the dimensions of the wayland `window`: > > 8< > diff --git a/src/egl/drivers/dri2/platform_wayland.c > b/src/egl/drivers/dri2/platform_wayland.c > index 85a11f4..7719407 100644 > --- a/src/egl/drivers/dri2/platform_wayland.c > +++ b/src/egl/drivers/dri2/platform_wayland.c > @@ -183,8 +183,8 @@ dri2_wl_create_window_surface(_EGLDriver *drv, > _EGLDisplay *disp, > dri2_surf->wl_win->private = dri2_surf; > dri2_surf->wl_win->destroy_window_callback = destroy_window_callback; > > - dri2_surf->base.Width = -1; > - dri2_surf->base.Height = -1; > + dri2_surf->base.Width = window->width; > + dri2_surf->base.Height = window->height; > > config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT, > dri2_surf->base.GLColorspace); > >8 > > This apparently fixes dEQP crashes caused by an assert: > deRandom.hpp:93: Unknown function: Assertion `min <= max' failed. > > No dEQP test is mentioned on our patch, but I think this happened on > dEQP-EGL.functional.resize.surface_size.* > Having another look, we seems to be rather inconsistent when we set them - see below. At the same time, I doubt any !X11 dEQP resize tests work: - iirc there's no wayland/drm/surfaceless support - none of the three has a .query_surface hook -Emil X11/dri2 - create_*_surface, all but !pbuffer - query_surface via xcb_get_geometry X11/dri3: - create_*_surface, all - query_surface via xcb_get_geometry drm - create_*_surface, all (window only) - query_surface - no hook surfaceless - create_*_surface, none (pbuffer only) - query_surface - no hook wayland - create_*_surface, none (window only) - query_surface - no hook ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] i965: Refactor brw_draw_prims.
brw_draw_prims needs to be refactored prior to ARB_indirect_parameters implementation. Signed-off-by: Plamena Manolova --- src/mesa/drivers/dri/i965/brw_draw.c | 343 +++ 1 file changed, 189 insertions(+), 154 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index a8ad2ac..7597bae 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -531,7 +531,7 @@ brw_postdraw_set_buffers_need_resolve(struct brw_context *brw) if (!irb) continue; - + brw_render_cache_set_add_bo(brw, irb->mt->bo); intel_miptree_finish_render(brw, irb->mt, irb->mt_level, irb->mt_layer, irb->layer_count, @@ -594,21 +594,163 @@ brw_postdraw_reconcile_align_wa_slices(struct brw_context *brw) * fallback conditions. */ static void -brw_try_draw_prims(struct gl_context *ctx, - const struct gl_vertex_array *arrays[], - const struct _mesa_prim *prims, - GLuint nr_prims, - const struct _mesa_index_buffer *ib, - bool index_bounds_valid, - GLuint min_index, - GLuint max_index, - struct brw_transform_feedback_object *xfb_obj, - unsigned stream, - struct gl_buffer_object *indirect) +brw_try_draw_prim(struct gl_context *ctx, + const struct gl_vertex_array *arrays[], + const struct _mesa_prim *prim, + const struct _mesa_index_buffer *ib, + bool index_bounds_valid, + GLuint min_index, + GLuint max_index, + struct brw_transform_feedback_object *xfb_obj, + unsigned stream, + struct gl_buffer_object *indirect) { struct brw_context *brw = brw_context(ctx); - GLuint i; bool fail_next = false; + int estimated_max_prim_size; + const int sampler_state_size = 16; + + estimated_max_prim_size = 512; /* batchbuffer commands */ + estimated_max_prim_size += BRW_MAX_TEX_UNIT * + (sampler_state_size + sizeof(struct gen5_sampler_default_color)); + estimated_max_prim_size += 1024; /* gen6 VS push constants */ + estimated_max_prim_size += 1024; /* gen6 WM push constants */ + estimated_max_prim_size += 512; /* misc. pad */ + + /* Flag BRW_NEW_DRAW_CALL on every draw. This allows us to have +* atoms that happen on every draw call. +*/ + brw->ctx.NewDriverState |= BRW_NEW_DRAW_CALL; + + /* Flush the batch if it's approaching full, so that we don't wrap while +* we've got validated state that needs to be in the same batch as the +* primitives. +*/ + intel_batchbuffer_require_space(brw, estimated_max_prim_size, RENDER_RING); + intel_batchbuffer_save_state(brw); + + if (brw->num_instances != prim->num_instances || + brw->basevertex != prim->basevertex || + brw->baseinstance != prim->base_instance) { + brw->num_instances = prim->num_instances; + brw->basevertex = prim->basevertex; + brw->baseinstance = prim->base_instance; + if (prim->draw_id > 0) { /* For draw_id == 0 we just did this before the loop */ + brw->ctx.NewDriverState |= BRW_NEW_VERTICES; + brw_merge_inputs(brw, arrays); + } + } + + /* Determine if we need to flag BRW_NEW_VERTICES for updating the +* gl_BaseVertexARB or gl_BaseInstanceARB values. For indirect draw, we +* always flag if the shader uses one of the values. For direct draws, +* we only flag if the values change. +*/ + const int new_basevertex = + prim->indexed ? prim->basevertex : prim->start; + const int new_baseinstance = prim->base_instance; + const struct brw_vs_prog_data *vs_prog_data = + brw_vs_prog_data(brw->vs.base.prog_data); + if (prim->draw_id > 0) { + const bool uses_draw_parameters = + vs_prog_data->uses_basevertex || + vs_prog_data->uses_baseinstance; + + if ((uses_draw_parameters && prim->is_indirect) || + (vs_prog_data->uses_basevertex && + brw->draw.params.gl_basevertex != new_basevertex) || + (vs_prog_data->uses_baseinstance && + brw->draw.params.gl_baseinstance != new_baseinstance)) + brw->ctx.NewDriverState |= BRW_NEW_VERTICES; + } + + brw->draw.params.gl_basevertex = new_basevertex; + brw->draw.params.gl_baseinstance = new_baseinstance; + brw_bo_unreference(brw->draw.draw_params_bo); + + if (prim->is_indirect) { + /* Point draw_params_bo at the indirect buffer. */ + brw->draw.draw_params_bo = + intel_buffer_object(ctx->DrawIndirectBuffer)->buffer; + brw_bo_reference(brw->draw.draw_params_bo); + brw->draw.draw_params_offset = + prim->indirect_offset + (prim->indexed ? 12 : 8); + } else { + /* Set draw_params_bo to NULL so brw_prep
[Mesa-dev] [PATCH 2/2] i965: Implement ARB_indirect_parameters
We can implement ARB_indirect_parameters for i965 by taking advantage of the conditional rendering mechanism. This works by issuing maxdrawcount draw calls and using conditional rendering to predicate each of them with "drawcount > gl_DrawID" Signed-off-by: Plamena Manolova --- src/mesa/drivers/dri/i965/brw_context.h | 3 + src/mesa/drivers/dri/i965/brw_draw.c | 97 src/mesa/drivers/dri/i965/brw_draw.h | 11 src/mesa/drivers/dri/i965/intel_extensions.c | 2 + 4 files changed, 113 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 2274fe5..4639d5b 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -831,6 +831,9 @@ struct brw_context int gl_drawid; struct brw_bo *draw_id_bo; uint32_t draw_id_offset; + + struct brw_bo *draw_params_count_bo; + uint32_t draw_params_count_offset; } draw; struct { diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 7597bae..473958c 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -820,6 +820,11 @@ brw_end_draw_prims(struct gl_context *ctx, brw_program_cache_check_size(brw); brw_postdraw_reconcile_align_wa_slices(brw); brw_postdraw_set_buffers_need_resolve(brw); + + if (brw->draw.draw_params_count_bo) { + brw_bo_unreference(brw->draw.draw_params_count_bo); + brw->draw.draw_params_count_bo = NULL; + } } void @@ -837,6 +842,8 @@ brw_draw_prims(struct gl_context *ctx, struct brw_context *brw = brw_context(ctx); const struct gl_vertex_array **arrays = ctx->Array._DrawArrays; int i; + int predicate_state = brw->predicate.state; + int combine_op = MI_PREDICATE_COMBINEOP_SET; struct brw_transform_feedback_object *xfb_obj = (struct brw_transform_feedback_object *) gl_xfb_obj; @@ -890,12 +897,101 @@ brw_draw_prims(struct gl_context *ctx, * to it. */ + if (brw->draw.draw_params_count_bo && + predicate_state == BRW_PREDICATE_STATE_USE_BIT) { + /* We do this to empty the MI_PREDICATE_DATA register */ + BEGIN_BATCH(4); + OUT_BATCH(MI_PREDICATE_DATA); + OUT_BATCH(0u); + OUT_BATCH(MI_PREDICATE_DATA + 4); + OUT_BATCH(0u); + ADVANCE_BATCH(); + + combine_op = MI_PREDICATE_COMBINEOP_AND; + } + for (i = 0; i < nr_prims; i++) { + if (brw->draw.draw_params_count_bo) { + struct brw_bo *draw_id_bo = brw_bo_alloc(brw->bufmgr, "draw_id", 4, 4); + + brw_bo_reference(draw_id_bo); + brw_bo_subdata(draw_id_bo, 0, 4, &prims[i].draw_id); + + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE); + + brw_load_register_mem(brw, MI_PREDICATE_SRC0, + brw->draw.draw_params_count_bo, + brw->draw.draw_params_count_offset); + brw_load_register_mem(brw, MI_PREDICATE_SRC1, draw_id_bo, 0); + + BEGIN_BATCH(1); + OUT_BATCH(GEN7_MI_PREDICATE | + MI_PREDICATE_LOADOP_LOADINV | combine_op | + MI_PREDICATE_COMPAREOP_DELTAS_EQUAL); + ADVANCE_BATCH(); + + brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT; + + brw_bo_unreference(draw_id_bo); + } + brw_try_draw_prim(ctx, arrays, &prims[i], ib, index_bounds_valid, min_index, max_index, xfb_obj, stream, indirect); } + brw_end_draw_prims(ctx, arrays, prims, nr_prims, ib, index_bounds_valid, min_index, max_index, xfb_obj, stream, indirect); + + brw->predicate.state = predicate_state; +} + +void +brw_draw_indirect_prims(struct gl_context *ctx, +GLuint mode, +struct gl_buffer_object *indirect_data, +GLsizeiptr indirect_offset, +unsigned draw_count, +unsigned stride, +struct gl_buffer_object *indirect_params, +GLsizeiptr indirect_params_offset, +const struct _mesa_index_buffer *ib) +{ + struct brw_context *brw = brw_context(ctx); + struct _mesa_prim *prim; + GLsizei i; + + prim = calloc(draw_count, sizeof(*prim)); + if (prim == NULL) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "gl%sDraw%sIndirect%s", + (draw_count > 1) ? "Multi" : "", + ib ? "Elements" : "Arrays", + indirect_params ? "CountARB" : ""); + return; + } + + prim[0].begin = 1; + prim[draw_count - 1].end = 1; + for (i = 0; i < draw_count; ++i, indirect_offset += stride) { + prim[i].mode = mode; + prim[i].indexed = !!ib; + prim[i].indirect_offset = indirect_offset; + prim[i].is_indirect = 1; + prim[i].draw_id = i; + } + + if (indirect_params) { + brw
Re: [Mesa-dev] [PATCH 01/19] configure: Add and use AX_CHECK_COMPILE_FLAG
On Tue, Aug 29, 2017 at 6:15 AM, Emil Velikov wrote: > On 28 August 2017 at 22:57, Matt Turner wrote: > >> -# Flags to help ensure that certain portions of the code -- and only >> those >> -# portions -- can be built with MSVC: >> -# - src/util, src/gallium/auxiliary, rc/gallium/drivers/llvmpipe, and >> -# - non-Linux/Posix OpenGL portions needs to build on MSVC 2013 (which >> -# supports most of C99) >> -# - the rest has no compiler compiler restrictions >> -MSVC2013_COMPAT_CFLAGS="-Werror=pointer-arith" >> -MSVC2013_COMPAT_CXXFLAGS="-Werror=pointer-arith" >> - > Can you please preserve the comment about the MSVC2013 flags? > With that the series is > Reviewed-by: Emil Velikov Yes, thank you. > Mildly curious: > Did you pipe the series through ezbench for perf testing? > Wondering if the inline and fabsf -> babs changes made any noticeable impact. No, I cannot imagine it could cause any change. It's really just changing when a float -> double conversion happens. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] swr: Report format max_samples=1 to maintain support for "fake" msaa.
Gentle bump. This fixes mesa master for Kitware's VTK testing. > On Aug 25, 2017, at 2:59 PM, Cherniak, Bruce wrote: > > Accompanying patch "st/mesa: only try to create 1x msaa surfaces for > 'fake' msaa" requires driver to report max_samples=1 to enable "fake" > msaa. Previously, 0 and 1 were treated equivalently in st_init_extensions() > and either could enable "fake" msaa. > > This patch raises the swr default msaa_max_count from 0 to 1, so that > swr_is_format_supported will report max_samples=1. > > Real msaa can still be enabled by exporting SWR_MSAA_MAX_COUNT with a > pow2 value between 2 and 16. > > This patch is necessary to prevent an OpenSWR regression resulting from > the st/mesa patch. > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102038 > --- > src/gallium/drivers/swr/swr_screen.cpp | 22 +++--- > 1 file changed, 11 insertions(+), 11 deletions(-) > > diff --git a/src/gallium/drivers/swr/swr_screen.cpp > b/src/gallium/drivers/swr/swr_screen.cpp > index 3287bc6fee..cc8d9955b8 100644 > --- a/src/gallium/drivers/swr/swr_screen.cpp > +++ b/src/gallium/drivers/swr/swr_screen.cpp > @@ -255,13 +255,13 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap > param) > return 1; > >/* MSAA support > -* If user has explicitly set max_sample_count = 0 (via > SWR_MSAA_MAX_COUNT) > -* then disable all MSAA support and go back to old caps. */ > +* If user has explicitly set max_sample_count = 1 (via > SWR_MSAA_MAX_COUNT) > +* then disable all MSAA support and go back to old (FAKE_SW_MSAA) caps. > */ >case PIPE_CAP_TEXTURE_MULTISAMPLE: >case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: > - return swr_screen(screen)->msaa_max_count ? 1 : 0; > + return (swr_screen(screen)->msaa_max_count > 1) ? 1 : 0; >case PIPE_CAP_FAKE_SW_MSAA: > - return swr_screen(screen)->msaa_max_count ? 0 : 1; > + return (swr_screen(screen)->msaa_max_count > 1) ? 0 : 1; > > /* unsupported features */ >case PIPE_CAP_ANISOTROPIC_FILTER: > @@ -1079,22 +1079,22 @@ swr_validate_env_options(struct swr_screen *screen) > screen->client_copy_limit = client_copy_limit; > >/* XXX msaa under development, disable by default for now */ > - screen->msaa_max_count = 0; /* was SWR_MAX_NUM_MULTISAMPLES; */ > + screen->msaa_max_count = 1; /* was SWR_MAX_NUM_MULTISAMPLES; */ > >/* validate env override values, within range and power of 2 */ > - int msaa_max_count = debug_get_num_option("SWR_MSAA_MAX_COUNT", 0); > - if (msaa_max_count) { > - if ((msaa_max_count < 0) || (msaa_max_count > SWR_MAX_NUM_MULTISAMPLES) > + int msaa_max_count = debug_get_num_option("SWR_MSAA_MAX_COUNT", 1); > + if (msaa_max_count != 1) { > + if ((msaa_max_count < 1) || (msaa_max_count > SWR_MAX_NUM_MULTISAMPLES) > || !util_is_power_of_two(msaa_max_count)) { > fprintf(stderr, "SWR_MSAA_MAX_COUNT invalid: %d\n", msaa_max_count); > fprintf(stderr, "must be power of 2 between 1 and %d" \ > - " (or 0 to disable msaa)\n", > + " (or 1 to disable msaa)\n", >SWR_MAX_NUM_MULTISAMPLES); > - msaa_max_count = 0; > + msaa_max_count = 1; > } > > fprintf(stderr, "SWR_MSAA_MAX_COUNT: %d\n", msaa_max_count); > - if (!msaa_max_count) > + if (msaa_max_count == 1) > fprintf(stderr, "(msaa disabled)\n"); > > screen->msaa_max_count = msaa_max_count; > -- > 2.11.0 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] st/mesa: only try to create 1x msaa surfaces for "fake" msaa drivers
Gentle bump. This fixes mesa master for Kitware's VTK testing. > On Aug 25, 2017, at 2:59 PM, Bruce Cherniak wrote: > > From: Brian Paul > > For software drivers where we want "fake" msaa support for GL 3.x, we > treat 1 sample as being msaa. > > For drivers with real msaa support, start format probing at 2x msaa. > For drivers with fake msaa support, start format probing at 1x msaa. > > This also tweaks the MaxSamples code in st_init_extensions() so that > we use MaxSamples=1 for fake msaa. This allows the format proble loops > to run at least one iteration. > > This fixes a llvmpipe/VTK regression from commit 6839d3369905eb02151. > And for drivers with fake msaa support, calls such as > glTexImage2DMultisample(samples=1) will now succeed. > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102038 > --- > src/mesa/state_tracker/st_cb_fbo.c | 13 ++--- > src/mesa/state_tracker/st_cb_texture.c | 11 --- > src/mesa/state_tracker/st_extensions.c | 14 ++ > 3 files changed, 24 insertions(+), 14 deletions(-) > > diff --git a/src/mesa/state_tracker/st_cb_fbo.c > b/src/mesa/state_tracker/st_cb_fbo.c > index afc7700306..a7c286bcc5 100644 > --- a/src/mesa/state_tracker/st_cb_fbo.c > +++ b/src/mesa/state_tracker/st_cb_fbo.c > @@ -155,12 +155,19 @@ st_renderbuffer_alloc_storage(struct gl_context * ctx, > * to and no more than the next larger sample count supported > * by the implementation. > * > -* So let's find the supported number of samples closest to NumSamples. > +* Find the supported number of samples >= rb->NumSamples > */ >if (rb->NumSamples > 0) { > - unsigned i; > + unsigned start, i; > > - for (i = MAX2(2, rb->NumSamples); i <= ctx->Const.MaxSamples; i++) { > + if (ctx->Const.MaxSamples > 1 && rb->NumSamples == 1) { > + /* don't try num_samples = 1 with drivers that support real msaa */ > + start = 2; > + } else { > + start = rb->NumSamples; > + } > + > + for (i = start; i <= ctx->Const.MaxSamples; i++) { > format = st_choose_renderbuffer_format(st, internalFormat, i); > > if (format != PIPE_FORMAT_NONE) { > diff --git a/src/mesa/state_tracker/st_cb_texture.c > b/src/mesa/state_tracker/st_cb_texture.c > index af2052db24..b5006b05a7 100644 > --- a/src/mesa/state_tracker/st_cb_texture.c > +++ b/src/mesa/state_tracker/st_cb_texture.c > @@ -2739,13 +2739,18 @@ st_texture_storage(struct gl_context *ctx, > >bindings = default_bindings(st, fmt); > > - /* Raise the sample count if the requested one is unsupported. */ >if (num_samples > 0) { > + /* Find msaa sample count which is actually supported. For example, > + * if the user requests 1x but only 4x or 8x msaa is supported, we'll > + * choose 4x here. > + */ > enum pipe_texture_target ptarget = gl_target_to_pipe(texObj->Target); > boolean found = FALSE; > > - /* start the query with at least two samples */ > - num_samples = MAX2(num_samples, 2); > + if (ctx->Const.MaxSamples > 1 && num_samples == 1) { > + /* don't try num_samples = 1 with drivers that support real msaa */ > + num_samples = 2; > + } > > for (; num_samples <= ctx->Const.MaxSamples; num_samples++) { > if (screen->is_format_supported(screen, fmt, ptarget, > diff --git a/src/mesa/state_tracker/st_extensions.c > b/src/mesa/state_tracker/st_extensions.c > index 904d9cd834..2008e28250 100644 > --- a/src/mesa/state_tracker/st_extensions.c > +++ b/src/mesa/state_tracker/st_extensions.c > @@ -1046,17 +1046,15 @@ void st_init_extensions(struct pipe_screen *screen, > void_formats, 32, > PIPE_BIND_RENDER_TARGET); >} > - if (consts->MaxSamples == 1) { > - /* one sample doesn't really make sense */ > - consts->MaxSamples = 0; > - } > - else if (consts->MaxSamples >= 2) { > + > + if (consts->MaxSamples >= 2) { > + /* Real MSAA support */ > extensions->EXT_framebuffer_multisample = GL_TRUE; > extensions->EXT_framebuffer_multisample_blit_scaled = GL_TRUE; >} > - > - if (consts->MaxSamples == 0 && > - screen->get_param(screen, PIPE_CAP_FAKE_SW_MSAA)) { > + else if (consts->MaxSamples > 0 && > +screen->get_param(screen, PIPE_CAP_FAKE_SW_MSAA)) { > + /* fake MSAA support */ > consts->FakeSWMSAA = GL_TRUE; > extensions->EXT_framebuffer_multisample = GL_TRUE; > extensions->EXT_framebuffer_multisample_blit_scaled = GL_TRUE; > -- > 2.11.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] radeonsi: update dirty_level_mask before dispatching
This fixes a rendering issue with Hitman when bindless textures are enabled. Fixes: 2263610827 ("radeonsi: flush DB caches only when transitioning from DB to texturing") Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/radeon/r600_pipe_common.h | 1 + src/gallium/drivers/radeonsi/si_compute.c | 5 + 2 files changed, 6 insertions(+) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 59886e..d76d4a1384 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -569,6 +569,7 @@ struct r600_common_context { unsignedgpu_reset_counter; unsignedlast_dirty_tex_counter; unsignedlast_compressed_colortex_counter; + unsignedlast_num_draw_calls; struct threaded_context *tc; struct u_suballocator *allocator_zeroed_memory; diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 3ebd22c3c1..ca334949d7 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -782,6 +782,11 @@ static void si_launch_grid( program->shader.compilation_failed) return; + if (sctx->b.last_num_draw_calls != sctx->b.num_draw_calls) { + si_update_fb_dirtiness_after_rendering(sctx); + sctx->b.last_num_draw_calls = sctx->b.num_draw_calls; + } + si_decompress_compute_textures(sctx); /* Add buffer sizes for memory checking in need_cs_space. */ -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] winsys/amdgpu: set AMDGPU_GEM_CREATE_LOCAL if possible
From: Christian König When the kernel supports it set the local flag and stop adding those BOs to the BO list. Can probably be optimized much more. Signed-off-by: Christian König --- src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 8 src/gallium/winsys/amdgpu/drm/amdgpu_bo.h | 2 ++ src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 22 +- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 08348ed..1238d58 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -38,6 +38,10 @@ #include #include +#ifndef AMDGPU_GEM_CREATE_LOCAL +#define AMDGPU_GEM_CREATE_LOCAL (1 << 6) +#endif + /* Set to 1 for verbose output showing committed sparse buffer ranges. */ #define DEBUG_SPARSE_COMMITS 0 @@ -402,6 +406,9 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; if (flags & RADEON_FLAG_GTT_WC) request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC; + if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING && + ws->info.drm_minor >= 20) + request.flags |= AMDGPU_GEM_CREATE_LOCAL; r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle); if (r) { @@ -435,6 +442,7 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, bo->u.real.va_handle = va_handle; bo->initial_domain = initial_domain; bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1); + bo->is_local = !!(request.flags & AMDGPU_GEM_CREATE_LOCAL); if (initial_domain & RADEON_DOMAIN_VRAM) ws->allocated_vram += align64(size, ws->info.gart_page_size); diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h index 1311344..10b095d 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h @@ -115,6 +115,8 @@ struct amdgpu_winsys_bo { unsigned num_fences; unsigned max_fences; struct pipe_fence_handle **fences; + + bool is_local; }; struct amdgpu_slab { diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 9cadfc4..0b76fba 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -1123,6 +1123,8 @@ void amdgpu_cs_submit_ib(void *job, int thread_index) free(handles); mtx_unlock(&ws->global_bo_list_lock); } else { + unsigned num_handles; + if (!amdgpu_add_sparse_backing_buffers(cs)) { r = -ENOMEM; goto bo_list_error; @@ -1142,21 +1144,31 @@ void amdgpu_cs_submit_ib(void *job, int thread_index) } } + num_handles = 0; for (i = 0; i < cs->num_real_buffers; ++i) { struct amdgpu_cs_buffer *buffer = &cs->real_buffers[i]; +if (buffer->bo->is_local) +continue; + assert(buffer->u.real.priority_usage != 0); - cs->handles[i] = buffer->bo->bo; - cs->flags[i] = (util_last_bit64(buffer->u.real.priority_usage) - 1) / 4; + cs->handles[num_handles] = buffer->bo->bo; + cs->flags[num_handles] = (util_last_bit64(buffer->u.real.priority_usage) - 1) / 4; +++num_handles; } if (acs->ring_type == RING_GFX) ws->gfx_bo_list_counter += cs->num_real_buffers; - r = amdgpu_bo_list_create(ws->dev, cs->num_real_buffers, -cs->handles, cs->flags, -&cs->request.resources); + if (num_handles) { + r = amdgpu_bo_list_create(ws->dev, num_handles, + cs->handles, cs->flags, + &cs->request.resources); + } else { + r = 0; +cs->request.resources = 0; + } } bo_list_error: -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] radeonsi: set a per-buffer flag that disables inter-process sharing (v4)
From: Marek Olšák For lower overhead in the CS ioctl. Winsys allocators are not used with interprocess-sharable resources. v2: It shouldn't crash anymore, but the kernel will reject the new flag. v3 (christian): Rename the flag, avoid sending those buffers in the BO list. v4 (christian): Remove setting the kernel flag for now --- src/gallium/drivers/radeon/r600_buffer_common.c | 7 ++ src/gallium/drivers/radeon/radeon_winsys.h | 20 + src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 30 ++--- src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 27 +- 4 files changed, 56 insertions(+), 28 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c index dd1c209..2747ac4 100644 --- a/src/gallium/drivers/radeon/r600_buffer_common.c +++ b/src/gallium/drivers/radeon/r600_buffer_common.c @@ -167,6 +167,13 @@ void r600_init_resource_fields(struct r600_common_screen *rscreen, RADEON_FLAG_GTT_WC; } + /* Only displayable single-sample textures can be shared between +* processes. */ + if (res->b.b.target == PIPE_BUFFER || + res->b.b.nr_samples >= 2 || + rtex->surface.micro_tile_mode != RADEON_MICRO_MODE_DISPLAY) + res->flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING; + /* If VRAM is just stolen system memory, allow both VRAM and * GTT, whichever has free space. If a buffer is evicted from * VRAM to GTT, it will stay there. diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index b00b144..f0a0a92 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -54,6 +54,7 @@ enum radeon_bo_flag { /* bitfield */ RADEON_FLAG_NO_CPU_ACCESS = (1 << 1), RADEON_FLAG_NO_SUBALLOC = (1 << 2), RADEON_FLAG_SPARSE =(1 << 3), +RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 4), }; enum radeon_bo_usage { /* bitfield */ @@ -661,14 +662,19 @@ static inline unsigned radeon_flags_from_heap(enum radeon_heap heap) { switch (heap) { case RADEON_HEAP_VRAM_NO_CPU_ACCESS: -return RADEON_FLAG_GTT_WC | RADEON_FLAG_NO_CPU_ACCESS; +return RADEON_FLAG_GTT_WC | + RADEON_FLAG_NO_CPU_ACCESS | + RADEON_FLAG_NO_INTERPROCESS_SHARING; + case RADEON_HEAP_VRAM: case RADEON_HEAP_VRAM_GTT: case RADEON_HEAP_GTT_WC: -return RADEON_FLAG_GTT_WC; +return RADEON_FLAG_GTT_WC | + RADEON_FLAG_NO_INTERPROCESS_SHARING; + case RADEON_HEAP_GTT: default: -return 0; +return RADEON_FLAG_NO_INTERPROCESS_SHARING; } } @@ -700,8 +706,14 @@ static inline int radeon_get_heap_index(enum radeon_bo_domain domain, /* NO_CPU_ACCESS implies VRAM only. */ assert(!(flags & RADEON_FLAG_NO_CPU_ACCESS) || domain == RADEON_DOMAIN_VRAM); +/* Resources with interprocess sharing don't use any winsys allocators. */ +if (!(flags & RADEON_FLAG_NO_INTERPROCESS_SHARING)) +return -1; + /* Unsupported flags: NO_SUBALLOC, SPARSE. */ -if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_NO_CPU_ACCESS)) +if (flags & ~(RADEON_FLAG_GTT_WC | + RADEON_FLAG_NO_CPU_ACCESS | + RADEON_FLAG_NO_INTERPROCESS_SHARING)) return -1; switch (domain) { diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 97bbe23..08348ed 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -1134,7 +1134,7 @@ amdgpu_bo_create(struct radeon_winsys *rws, { struct amdgpu_winsys *ws = amdgpu_winsys(rws); struct amdgpu_winsys_bo *bo; - unsigned usage = 0, pb_cache_bucket; + unsigned usage = 0, pb_cache_bucket = 0; /* VRAM implies WC. This is not optional. */ assert(!(domain & RADEON_DOMAIN_VRAM) || flags & RADEON_FLAG_GTT_WC); @@ -1189,19 +1189,23 @@ no_slab: size = align64(size, ws->info.gart_page_size); alignment = align(alignment, ws->info.gart_page_size); - int heap = radeon_get_heap_index(domain, flags); - assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS); - usage = 1 << heap; /* Only set one usage bit for each heap. */ + bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING; - pb_cache_bucket = radeon_get_pb_cache_bucket_index(heap); - assert(pb_cache_bucket < ARRAY_SIZE(ws->bo_cache.buckets)); + if (use_reusable_pool) { + int heap = radeon_get_heap_index(domain, flags); + assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS); + usage = 1 << heap; /* Only set one usage bit for each heap. */ - /* Get a buffer from the cache. */ - bo = (struct amdgpu_winsys_bo*) -pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, usage, -p
Re: [Mesa-dev] [PATCH 08/10] egl/wayland: remove dri2_surf width/height double init.
On Sunday, 2017-08-27 11:20:33 +0100, Emil Velikov wrote: > From: Emil Velikov > > The dimensions are already set [to 0 or the value provided by the > attributes list] by the _eglInitSurface() call further up. > > The values are updated, as the DRI driver calls the DRI2/IMAGE_LOADER' > get_buffers, shortly before making use of the values. > > Signed-off-by: Emil Velikov > --- > src/egl/drivers/dri2/platform_wayland.c | 3 --- > 1 file changed, 3 deletions(-) > > diff --git a/src/egl/drivers/dri2/platform_wayland.c > b/src/egl/drivers/dri2/platform_wayland.c > index f9554fccde9..89e94e11f4f 100644 > --- a/src/egl/drivers/dri2/platform_wayland.c > +++ b/src/egl/drivers/dri2/platform_wayland.c > @@ -198,9 +198,6 @@ dri2_wl_create_window_surface(_EGLDriver *drv, > _EGLDisplay *disp, > dri2_surf->wl_win->private = dri2_surf; > dri2_surf->wl_win->destroy_window_callback = destroy_window_callback; > > - dri2_surf->base.Width = -1; > - dri2_surf->base.Height = -1; > - We actually have a local patch here that replaces this `-1` initialisation with the dimensions of the wayland `window`: 8< diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index 85a11f4..7719407 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -183,8 +183,8 @@ dri2_wl_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp, dri2_surf->wl_win->private = dri2_surf; dri2_surf->wl_win->destroy_window_callback = destroy_window_callback; - dri2_surf->base.Width = -1; - dri2_surf->base.Height = -1; + dri2_surf->base.Width = window->width; + dri2_surf->base.Height = window->height; config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT, dri2_surf->base.GLColorspace); >8 This apparently fixes dEQP crashes caused by an assert: deRandom.hpp:93: Unknown function: Assertion `min <= max' failed. No dEQP test is mentioned on our patch, but I think this happened on dEQP-EGL.functional.resize.surface_size.* > config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT, > dri2_surf->base.GLColorspace); > > -- > 2.14.0 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] llvmpipe: initialize llvmpipe->dirty with LP_NEW_SCISSOR
On 28 August 2017 at 21:20, Brian Paul wrote: > If llvmpipe_set_scissor_states() is never called, we still need to be sure > that derived scissor/clip state is updated. As of commit 743ad599a97d09b1 > that function might not be called. > > Fixes regressed Piglit gl-1.0-scissor-offscreen -fbo -auto test. > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101709 Please add a fixes and/or stable tag when pushing the patch. Fixes: 743ad599a97 ("st/mesa: don't set 16 scissors and 16 viewports if they're unused") Cc: "17.2" Thanks Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 101851] [regression] libEGL_common.a undefined reference to '__gxx_personality_v0'
https://bugs.freedesktop.org/show_bug.cgi?id=101851 Emil Velikov changed: What|Removed |Added Resolution|--- |FIXED Status|NEW |RESOLVED --- Comment #10 from Emil Velikov --- Thanks all. Merged as commit 0ac78dc92582a59d4319ebce019b4caa41fb432d Author: Emil Velikov Date: Sat Aug 26 02:37:11 2017 +0100 util: move string_to_uint_map to glsl -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] winsys/amdgpu: add BO to the global list only when RADEON_ALL_BOS is set
Only useful when that debug option is enabled. Signed-off-by: Samuel Pitoiset --- src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 20 src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 4 +--- src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 3 +++ src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h | 1 + 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 97bbe235a4..1323be8356 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -167,10 +167,12 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf) assert(bo->bo && "must not be called for slab entries"); - mtx_lock(&bo->ws->global_bo_list_lock); - LIST_DEL(&bo->u.real.global_list_item); - bo->ws->num_buffers--; - mtx_unlock(&bo->ws->global_bo_list_lock); + if (bo->ws->debug_all_bos) { + mtx_lock(&bo->ws->global_bo_list_lock); + LIST_DEL(&bo->u.real.global_list_item); + bo->ws->num_buffers--; + mtx_unlock(&bo->ws->global_bo_list_lock); + } amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP); amdgpu_va_range_free(bo->u.real.va_handle); @@ -360,10 +362,12 @@ static void amdgpu_add_buffer_to_global_list(struct amdgpu_winsys_bo *bo) assert(bo->bo); - mtx_lock(&ws->global_bo_list_lock); - LIST_ADDTAIL(&bo->u.real.global_list_item, &ws->global_bo_list); - ws->num_buffers++; - mtx_unlock(&ws->global_bo_list_lock); + if (ws->debug_all_bos) { + mtx_lock(&ws->global_bo_list_lock); + LIST_ADDTAIL(&bo->u.real.global_list_item, &ws->global_bo_list); + ws->num_buffers++; + mtx_unlock(&ws->global_bo_list_lock); + } } static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 9cadfc4298..5ddde8e794 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -903,8 +903,6 @@ static unsigned amdgpu_cs_get_buffer_list(struct radeon_winsys_cs *rcs, return cs->num_real_buffers; } -DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", false) - static void amdgpu_add_fence_dependency(struct amdgpu_cs *acs, struct amdgpu_cs_buffer *buffer) { @@ -1097,7 +1095,7 @@ void amdgpu_cs_submit_ib(void *job, int thread_index) /* Create the buffer list. * Use a buffer list containing all allocated buffers if requested. */ - if (debug_get_option_all_bos()) { + if (ws->debug_all_bos) { struct amdgpu_winsys_bo *bo; amdgpu_bo_handle *handles; unsigned num = 0; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index a8b5e5b41a..5e0c1fd8d8 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -50,6 +50,8 @@ static struct util_hash_table *dev_tab = NULL; static mtx_t dev_tab_mutex = _MTX_INITIALIZER_NP; +DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", false) + /* Helper function to do the ioctls needed for setup and init. */ static bool do_winsys_init(struct amdgpu_winsys *ws, int fd) { @@ -70,6 +72,7 @@ static bool do_winsys_init(struct amdgpu_winsys *ws, int fd) } ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL; + ws->debug_all_bos = debug_get_option_all_bos(); return true; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h index 7aca612f45..de54470ede 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h @@ -79,6 +79,7 @@ struct amdgpu_winsys { ADDR_HANDLE addrlib; bool check_vm; + bool debug_all_bos; /* List of all allocated buffers */ mtx_t global_bo_list_lock; -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 4/5] i965: perf: add support for userspace configurations
On 29 August 2017 at 14:51, Lionel Landwerlin wrote: > On 29/08/17 14:39, Emil Velikov wrote: >> >> Hi Lionel, >> >> On 29 August 2017 at 14:05, Lionel Landwerlin >> wrote: >>> >>> This allows us to deploy new configurations without touching the >>> kernel. >>> >>> v2: Detect loadable configs without creating one (Chris) >>> >>> Signed-off-by: Lionel Landwerlin >>> --- >>> src/mesa/drivers/dri/i965/brw_performance_query.c | 98 >>> ++- >>> 1 file changed, 95 insertions(+), 3 deletions(-) >>> >>> diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c >>> b/src/mesa/drivers/dri/i965/brw_performance_query.c >>> index 4b585c95b7d..6ec6b417e52 100644 >>> --- a/src/mesa/drivers/dri/i965/brw_performance_query.c >>> +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c >>> @@ -1717,11 +1717,11 @@ read_file_uint64(const char *file, uint64_t *val) >>> >>> fd = open(file, 0); >>> if (fd < 0) >>> - return false; >>> +return false; >>> n = read(fd, buf, sizeof (buf) - 1); >>> close(fd); >>> if (n < 0) >>> - return false; >>> +return false; >>> >>> buf[n] = '\0'; >>> *val = strtoull(buf, NULL, 0); >>> @@ -1807,6 +1807,95 @@ read_sysfs_drm_device_file_uint64(struct >>> brw_context *brw, >>> return read_file_uint64(buf, value); >>> } >>> >>> +static bool >>> +kernel_has_dynamic_config_support(struct brw_context *brw, >>> + const char *sysfs_dev_dir) >>> +{ >>> + __DRIscreen *screen = brw->screen->driScrnPriv; >>> + struct hash_entry *entry; >>> + >>> + hash_table_foreach(brw->perfquery.oa_metrics_table, entry) { >>> + struct brw_perf_query_info *query = entry->data; >>> + char config_path[256]; >>> + uint64_t config_id; >>> + >>> + snprintf(config_path, sizeof(config_path), >>> + "%s/metrics/%s/id", sysfs_dev_dir, query->guid); >>> + >>> + /* Look for the test config, which we know we can't replace. */ >>> + if (read_file_uint64(config_path, &config_id) && config_id == 1) { >>> + uint32_t mux_regs[] = { 0x9888 /* NOA_WRITE */, 0x0 }; >> >> The array seems to have a sentinel, yet one provides explicit array >> size (of 1) further down. >> Having a look through i915_drm.h the expectation is not documented. > > > This is the number of pair of (register, value). > >> >> Can I suggest adding a comment [in the kernel i915_drm.h], or dropping >> the sentinel - whichever seems applicable. > > > I guess I'll add a comment in the kernel. > >> >>> + struct drm_i915_perf_oa_config config; >>> + >> >> Unless I failed at math - the struct does not seem compat safe. >> That is sizeof(struct drm_i915_perf_oa_config) varies across 32/64bit >> builds. > > > Hmm... What's not compat safe in there? : > > struct drm_i915_perf_oa_config { > /** String formatted like "%08x-%04x-%04x-%04x-%012x" */ > char uuid[36]; > > __u32 n_mux_regs; > __u32 n_boolean_regs; > __u32 n_flex_regs; > > __u64 mux_regs_ptr; > __u64 boolean_regs_ptr; > __u64 flex_regs_ptr; > }; > Scratch that - I misread 36 as 32, which would have left a 4 byte whole in 64bit builds. Pardon for the noise. -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Mesa-stable] [PATCH] st/query: init result data with 0
yeah, I already send out a "doing it in the driver" patch for this: https://lists.freedesktop.org/archives/mesa-dev/2017-August/167798.html On Mon, Aug 28, 2017 at 10:22 PM, Marek Olšák wrote: > You can also call util_query_clear_result in the driver. > > Marek > > On Sat, Aug 26, 2017 at 1:15 AM, Karol Herbst wrote: >> otherwise the result might contain random data. >> >> fixes on nvc0: >> * KHR-GL45.pipeline_statistics_query_tests_ARB.functional_default_qo_values >> * >> KHR-GL45.pipeline_statistics_query_tests_ARB.functional_non_rendering_commands_do_not_affect_queries >> >> Signed-off-by: Karol Herbst >> Cc: mesa-sta...@lists.freedesktop.org >> --- >> src/mesa/state_tracker/st_cb_queryobj.c | 2 +- >> 1 file changed, 1 insertion(+), 1 deletion(-) >> >> diff --git a/src/mesa/state_tracker/st_cb_queryobj.c >> b/src/mesa/state_tracker/st_cb_queryobj.c >> index 4c25724b5d..9a65fe7bd9 100644 >> --- a/src/mesa/state_tracker/st_cb_queryobj.c >> +++ b/src/mesa/state_tracker/st_cb_queryobj.c >> @@ -211,7 +211,7 @@ get_query_result(struct pipe_context *pipe, >> struct st_query_object *stq, >> boolean wait) >> { >> - union pipe_query_result data; >> + union pipe_query_result data = { 0 }; >> >> if (!stq->pq) { >>/* Only needed in case we failed to allocate the gallium query >> earlier. >> -- >> 2.14.1 >> >> ___ >> mesa-stable mailing list >> mesa-sta...@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-stable ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 4/5] i965: perf: add support for userspace configurations
On 29/08/17 14:39, Emil Velikov wrote: Hi Lionel, On 29 August 2017 at 14:05, Lionel Landwerlin wrote: This allows us to deploy new configurations without touching the kernel. v2: Detect loadable configs without creating one (Chris) Signed-off-by: Lionel Landwerlin --- src/mesa/drivers/dri/i965/brw_performance_query.c | 98 ++- 1 file changed, 95 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index 4b585c95b7d..6ec6b417e52 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -1717,11 +1717,11 @@ read_file_uint64(const char *file, uint64_t *val) fd = open(file, 0); if (fd < 0) - return false; +return false; n = read(fd, buf, sizeof (buf) - 1); close(fd); if (n < 0) - return false; +return false; buf[n] = '\0'; *val = strtoull(buf, NULL, 0); @@ -1807,6 +1807,95 @@ read_sysfs_drm_device_file_uint64(struct brw_context *brw, return read_file_uint64(buf, value); } +static bool +kernel_has_dynamic_config_support(struct brw_context *brw, + const char *sysfs_dev_dir) +{ + __DRIscreen *screen = brw->screen->driScrnPriv; + struct hash_entry *entry; + + hash_table_foreach(brw->perfquery.oa_metrics_table, entry) { + struct brw_perf_query_info *query = entry->data; + char config_path[256]; + uint64_t config_id; + + snprintf(config_path, sizeof(config_path), + "%s/metrics/%s/id", sysfs_dev_dir, query->guid); + + /* Look for the test config, which we know we can't replace. */ + if (read_file_uint64(config_path, &config_id) && config_id == 1) { + uint32_t mux_regs[] = { 0x9888 /* NOA_WRITE */, 0x0 }; The array seems to have a sentinel, yet one provides explicit array size (of 1) further down. Having a look through i915_drm.h the expectation is not documented. This is the number of pair of (register, value). Can I suggest adding a comment [in the kernel i915_drm.h], or dropping the sentinel - whichever seems applicable. I guess I'll add a comment in the kernel. + struct drm_i915_perf_oa_config config; + Unless I failed at math - the struct does not seem compat safe. That is sizeof(struct drm_i915_perf_oa_config) varies across 32/64bit builds. Hmm... What's not compat safe in there? : struct drm_i915_perf_oa_config { /** String formatted like "%08x-%04x-%04x-%04x-%012x" */ char uuid[36]; __u32 n_mux_regs; __u32 n_boolean_regs; __u32 n_flex_regs; __u64 mux_regs_ptr; __u64 boolean_regs_ptr; __u64 flex_regs_ptr; }; Some padding should fix it. HTH Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] util: move string_to_uint_map to glsl
On 29 August 2017 at 11:41, Mike Lothian wrote: > Feel free to add my tested by for this > Great, thanks. Pushed to master. Will scoop for 17.2 in a moment. -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 4/5] i965: perf: add support for userspace configurations
Hi Lionel, On 29 August 2017 at 14:05, Lionel Landwerlin wrote: > This allows us to deploy new configurations without touching the > kernel. > > v2: Detect loadable configs without creating one (Chris) > > Signed-off-by: Lionel Landwerlin > --- > src/mesa/drivers/dri/i965/brw_performance_query.c | 98 > ++- > 1 file changed, 95 insertions(+), 3 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c > b/src/mesa/drivers/dri/i965/brw_performance_query.c > index 4b585c95b7d..6ec6b417e52 100644 > --- a/src/mesa/drivers/dri/i965/brw_performance_query.c > +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c > @@ -1717,11 +1717,11 @@ read_file_uint64(const char *file, uint64_t *val) > > fd = open(file, 0); > if (fd < 0) > - return false; > +return false; > n = read(fd, buf, sizeof (buf) - 1); > close(fd); > if (n < 0) > - return false; > +return false; > > buf[n] = '\0'; > *val = strtoull(buf, NULL, 0); > @@ -1807,6 +1807,95 @@ read_sysfs_drm_device_file_uint64(struct brw_context > *brw, > return read_file_uint64(buf, value); > } > > +static bool > +kernel_has_dynamic_config_support(struct brw_context *brw, > + const char *sysfs_dev_dir) > +{ > + __DRIscreen *screen = brw->screen->driScrnPriv; > + struct hash_entry *entry; > + > + hash_table_foreach(brw->perfquery.oa_metrics_table, entry) { > + struct brw_perf_query_info *query = entry->data; > + char config_path[256]; > + uint64_t config_id; > + > + snprintf(config_path, sizeof(config_path), > + "%s/metrics/%s/id", sysfs_dev_dir, query->guid); > + > + /* Look for the test config, which we know we can't replace. */ > + if (read_file_uint64(config_path, &config_id) && config_id == 1) { > + uint32_t mux_regs[] = { 0x9888 /* NOA_WRITE */, 0x0 }; The array seems to have a sentinel, yet one provides explicit array size (of 1) further down. Having a look through i915_drm.h the expectation is not documented. Can I suggest adding a comment [in the kernel i915_drm.h], or dropping the sentinel - whichever seems applicable. > + struct drm_i915_perf_oa_config config; > + Unless I failed at math - the struct does not seem compat safe. That is sizeof(struct drm_i915_perf_oa_config) varies across 32/64bit builds. Some padding should fix it. HTH Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 01/19] configure: Add and use AX_CHECK_COMPILE_FLAG
On 28 August 2017 at 22:57, Matt Turner wrote: > -# Flags to help ensure that certain portions of the code -- and only > those > -# portions -- can be built with MSVC: > -# - src/util, src/gallium/auxiliary, rc/gallium/drivers/llvmpipe, and > -# - non-Linux/Posix OpenGL portions needs to build on MSVC 2013 (which > -# supports most of C99) > -# - the rest has no compiler compiler restrictions > -MSVC2013_COMPAT_CFLAGS="-Werror=pointer-arith" > -MSVC2013_COMPAT_CXXFLAGS="-Werror=pointer-arith" > - Can you please preserve the comment about the MSVC2013 flags? With that the series is Reviewed-by: Emil Velikov Mildly curious: Did you pipe the series through ezbench for perf testing? Wondering if the inline and fabsf -> babs changes made any noticeable impact. -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH mesa] khronos/egl: remove dependency on Android NDK header
On Friday, 2017-08-25 16:47:35 +0100, Emil Velikov wrote: > On 24 August 2017 at 15:22, Eric Engestrom wrote: > > On Thursday, 2017-08-24 08:54:04 -0500, Rob Herring wrote: > >> On Thu, Aug 24, 2017 at 7:49 AM, Eric Engestrom > >> wrote: > >> > Khronos: https://github.com/KhronosGroup/EGL-Registry/pull/22 > >> > Cc: Rob Herring > >> > Cc: Emil Velikov > >> > Signed-off-by: Eric Engestrom > >> > --- > >> > include/EGL/eglplatform.h | 3 +-- > >> > 1 file changed, 1 insertion(+), 2 deletions(-) > >> > > >> > diff --git a/include/EGL/eglplatform.h b/include/EGL/eglplatform.h > >> > index f045d009c0..bf9ec0bf5f 100644 > >> > --- a/include/EGL/eglplatform.h > >> > +++ b/include/EGL/eglplatform.h > >> > @@ -97,8 +97,7 @@ typedef void *EGLNativeWindowType; > >> > > >> > #elif defined(__ANDROID__) || defined(ANDROID) > >> > > >> > -#include > >> > - > >> > +struct ANativeWindow; > >> > struct egl_native_pixmap_t; > >> > >> How does this work when we need to dereference the struct to call > >> ANativeWindow::dequeueBuffer() and others? > > > > Right, there are two things at play here: > > - eglplatform.h doesn't need to know the struct, so it shouldn't include > > a whole header but simply forward declare for the pointer. > > - platform_android does need it, but wasn't including the proper > > headers, so I missed it in my initial grep. > > > > It seems these two issues are orthogonal after all. This khronos header > > patch should land IMO, but platform_android needs a patch to avoid > > breaking (incoming) and will need the proper libraries that your patch > > 2/2 provides for the O update. > > > Thanks guys, for the correction. > The two "issues" seem the same but not. > > On this patch - Eric, can you please mention in the commit message why > we don't copy the whole file. I pushed it a couple hours before your email, sorry. /me needs to learn not to push stuff right away, let it sit for a bit... As for "why we don't copy the whole file", we essentially do, but we keep our local platforms additions (WL, GBM, X11 on MacOS, and Haiku), right? Or is there something else? > > With that: > Reviewed-by: Emil Velikov > > -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 3/5] i965: perf: list registers to program for queries
Signed-off-by: Lionel Landwerlin --- src/mesa/drivers/dri/i965/brw_context.h | 15 ++ src/mesa/drivers/dri/i965/brw_oa.py | 51 + 2 files changed, 66 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 8782de814c7..5508aaf6128 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -603,6 +603,11 @@ enum brw_query_kind { PIPELINE_STATS }; +struct brw_perf_query_register_prog { + uint32_t reg; + uint32_t val; +}; + struct brw_perf_query_info { enum brw_query_kind kind; @@ -622,6 +627,16 @@ struct brw_perf_query_info int a_offset; int b_offset; int c_offset; + + /* Register programming for a given query */ + struct brw_perf_query_register_prog *flex_regs; + uint32_t n_flex_regs; + + struct brw_perf_query_register_prog *mux_regs; + uint32_t n_mux_regs; + + struct brw_perf_query_register_prog *b_counter_regs; + uint32_t n_b_counter_regs; }; /** diff --git a/src/mesa/drivers/dri/i965/brw_oa.py b/src/mesa/drivers/dri/i965/brw_oa.py index 8c35923462f..576ea6687fc 100644 --- a/src/mesa/drivers/dri/i965/brw_oa.py +++ b/src/mesa/drivers/dri/i965/brw_oa.py @@ -380,6 +380,45 @@ def output_counter_report(set, counter, current_offset): return current_offset + sizeof(c_type) +register_types = { +'FLEX': 'flex_regs', +'NOA': 'mux_regs', +'OA': 'b_counter_regs', +} + +def compute_register_lengths(set): +register_lengths = {} +register_configs = set.findall('register_config') +for register_config in register_configs: +t = register_types[register_config.get('type')] +if t not in register_lengths: +register_lengths[t] = len(register_config.findall('register')) +else: +register_lengths[t] += len(register_config.findall('register')) + +return register_lengths + + +def generate_register_configs(set): +register_configs = set.findall('register_config') +for register_config in register_configs: +t = register_types[register_config.get('type')] + +availability = register_config.get('availability') +if availability: +output_availability(set, availability, register_config.get('type') + ' register config') +c_indent(3) + +for register in register_config.findall('register'): +c("query->%s[query->n_%s++] = (struct brw_perf_query_register_prog) { .reg = %s, .val = %s };" % + (t, t, register.get('address'), register.get('value'))) + +if availability: +c_outdent(3) +c("}") +c("\n") + + def main(): global c_file global header_file @@ -475,6 +514,12 @@ def main(): max_values[counter.get('symbol_name')] = output_counter_max(set, counter, empty_vars) counter_vars["$" + counter.get('symbol_name')] = counter +c("\n") +register_lengths = compute_register_lengths(set); +for reg_type, reg_length in register_lengths.iteritems(): +c("static struct brw_perf_query_register_prog {0}_{1}_{2}[{3}];".format(chipset, + set.get('underscore_name'), + reg_type, reg_length)) c("\nstatic struct brw_perf_query_counter {0}_{1}_query_counters[{2}];\n".format(chipset, set.get('underscore_name'), len(counters))) c("static struct brw_perf_query_info " + chipset + "_" + set.get('underscore_name') + "_query = {\n") @@ -510,6 +555,10 @@ def main(): .c_offset = 46, """)) +for reg_type, reg_length in register_lengths.iteritems(): +c(".{0} = {1}_{2}_{3},".format(reg_type, chipset, set.get('underscore_name'), reg_type)) +c(".n_{0} = 0, /* Determined at runtime */".format(reg_type)) + c_outdent(3) c("};\n") @@ -528,6 +577,8 @@ def main(): c("\nif (!query->data_size) {") c_indent(3) +generate_register_configs(set) + offset = 0 for counter in counters: offset = output_counter_report(set, counter, offset) -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 5/5] i965: add a debug option to disable oa config loading
This provides a good way to verify we haven't broken using the perf driver on older kernels (which don't have the oa config loading mechanism). Signed-off-by: Lionel Landwerlin --- src/intel/common/gen_debug.c | 1 + src/intel/common/gen_debug.h | 2 +- src/mesa/drivers/dri/i965/brw_performance_query.c | 3 ++- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/intel/common/gen_debug.c b/src/intel/common/gen_debug.c index b604d56ef86..a17d628f577 100644 --- a/src/intel/common/gen_debug.c +++ b/src/intel/common/gen_debug.c @@ -57,6 +57,7 @@ static const struct debug_control debug_control[] = { { "vert",DEBUG_VERTS }, { "dri", DEBUG_DRI }, { "sf", DEBUG_SF }, + { "no-oaconfig", DEBUG_NO_OACONFIG }, { "wm", DEBUG_WM }, { "urb", DEBUG_URB }, { "vs", DEBUG_VS }, diff --git a/src/intel/common/gen_debug.h b/src/intel/common/gen_debug.h index d290303682e..8c8cce4f846 100644 --- a/src/intel/common/gen_debug.h +++ b/src/intel/common/gen_debug.h @@ -57,7 +57,7 @@ extern uint64_t INTEL_DEBUG; #define DEBUG_VERTS (1ull << 13) #define DEBUG_DRI (1ull << 14) #define DEBUG_SF (1ull << 15) -/* Hole - feel free to reuse (1ull << 16) */ +#define DEBUG_NO_OACONFIG (1ull << 16) #define DEBUG_WM (1ull << 17) #define DEBUG_URB (1ull << 18) #define DEBUG_VS (1ull << 19) diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index 6ec6b417e52..34e79f398ab 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -2143,7 +2143,8 @@ brw_init_perf_query_info(struct gl_context *ctx) */ oa_register(brw); - if (kernel_has_dynamic_config_support(brw, sysfs_dev_dir)) + if (likely((INTEL_DEBUG & DEBUG_NO_OACONFIG) == 0) && + kernel_has_dynamic_config_support(brw, sysfs_dev_dir)) init_oa_configs(brw, sysfs_dev_dir); else enumerate_sysfs_metrics(brw, sysfs_dev_dir); -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 2/5] i965: perf: factorize code for availability
Signed-off-by: Lionel Landwerlin --- src/mesa/drivers/dri/i965/brw_oa.py | 28 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_oa.py b/src/mesa/drivers/dri/i965/brw_oa.py index 91f7ecb5731..8c35923462f 100644 --- a/src/mesa/drivers/dri/i965/brw_oa.py +++ b/src/mesa/drivers/dri/i965/brw_oa.py @@ -325,6 +325,21 @@ semantic_type_map = { "ratio": "event" } +def output_availability(set, availability, counter_name): +expression = splice_rpn_expression(set, counter_name, availability) +lines = expression.split(' && ') +n_lines = len(lines) +if n_lines == 1: +c("if (" + lines[0] + ") {") +else: +c("if (" + lines[0] + " &&") +c_indent(4) +for i in range(1, (n_lines - 1)): +c(lines[i] + " &&") +c(lines[(n_lines - 1)] + ") {") +c_outdent(4) + + def output_counter_report(set, counter, current_offset): data_type = counter.get('data_type') data_type_uc = data_type.upper() @@ -343,18 +358,7 @@ def output_counter_report(set, counter, current_offset): availability = counter.get('availability') if availability: -expression = splice_rpn_expression(set, counter, availability) -lines = expression.split(' && ') -n_lines = len(lines) -if n_lines == 1: -c("if (" + lines[0] + ") {") -else: -c("if (" + lines[0] + " &&") -c_indent(4) -for i in range(1, (n_lines - 1)): -c(lines[i] + " &&") -c(lines[(n_lines - 1)] + ") {") -c_outdent(4) +output_availability(set, availability, counter.get('name')) c_indent(3) c("counter = &query->counters[query->n_counters++];\n") -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 4/5] i965: perf: add support for userspace configurations
This allows us to deploy new configurations without touching the kernel. v2: Detect loadable configs without creating one (Chris) Signed-off-by: Lionel Landwerlin --- src/mesa/drivers/dri/i965/brw_performance_query.c | 98 ++- 1 file changed, 95 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index 4b585c95b7d..6ec6b417e52 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -1717,11 +1717,11 @@ read_file_uint64(const char *file, uint64_t *val) fd = open(file, 0); if (fd < 0) - return false; +return false; n = read(fd, buf, sizeof (buf) - 1); close(fd); if (n < 0) - return false; +return false; buf[n] = '\0'; *val = strtoull(buf, NULL, 0); @@ -1807,6 +1807,95 @@ read_sysfs_drm_device_file_uint64(struct brw_context *brw, return read_file_uint64(buf, value); } +static bool +kernel_has_dynamic_config_support(struct brw_context *brw, + const char *sysfs_dev_dir) +{ + __DRIscreen *screen = brw->screen->driScrnPriv; + struct hash_entry *entry; + + hash_table_foreach(brw->perfquery.oa_metrics_table, entry) { + struct brw_perf_query_info *query = entry->data; + char config_path[256]; + uint64_t config_id; + + snprintf(config_path, sizeof(config_path), + "%s/metrics/%s/id", sysfs_dev_dir, query->guid); + + /* Look for the test config, which we know we can't replace. */ + if (read_file_uint64(config_path, &config_id) && config_id == 1) { + uint32_t mux_regs[] = { 0x9888 /* NOA_WRITE */, 0x0 }; + struct drm_i915_perf_oa_config config; + + memset(&config, 0, sizeof(config)); + + memcpy(config.uuid, query->guid, sizeof(config.uuid)); + + config.n_mux_regs = 1; + config.mux_regs_ptr = (uintptr_t) mux_regs; + + if (ioctl(screen->fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config_id) < 0 && + errno == ENOENT) +return true; + + break; + } + } + + return false; +} + +static void +init_oa_configs(struct brw_context *brw, const char *sysfs_dev_dir) +{ + __DRIscreen *screen = brw->screen->driScrnPriv; + struct hash_entry *entry; + + hash_table_foreach(brw->perfquery.oa_metrics_table, entry) { + struct brw_perf_query_info *query = entry->data; + struct drm_i915_perf_oa_config config; + char config_path[256]; + uint64_t config_id; + int ret; + + snprintf(config_path, sizeof(config_path), + "%s/metrics/%s/id", sysfs_dev_dir, query->guid); + + if (read_file_uint64(config_path, &config_id)) { + if (config_id <= 1) +continue; + + ioctl(screen->fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config_id); + } + + memset(&config, 0, sizeof(config)); + + memcpy(config.uuid, query->guid, sizeof(config.uuid)); + + config.n_mux_regs = query->n_mux_regs; + config.mux_regs_ptr = (uintptr_t) query->mux_regs; + + config.n_boolean_regs = query->n_b_counter_regs; + config.boolean_regs_ptr = (uintptr_t) query->b_counter_regs; + + config.n_flex_regs = query->n_flex_regs; + config.flex_regs_ptr = (uintptr_t) query->flex_regs; + + ret = ioctl(screen->fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &config); + if (ret < 0) { + DBG("Failed to load \"%s\" (%s) metrics set in kernel: %s\n", + query->name, query->guid, strerror(errno)); + } else { + struct brw_perf_query_info *registred_query = +append_query_info(brw); + *registred_query = *query; + registred_query->oa_metrics_set_id = ret; + DBG("metric set registred: id = %" PRIu64", guid = %s\n", + registred_query->oa_metrics_set_id, query->guid); + } + } +} + static bool init_oa_sys_vars(struct brw_context *brw, const char *sysfs_dev_dir) { @@ -2054,7 +2143,10 @@ brw_init_perf_query_info(struct gl_context *ctx) */ oa_register(brw); - enumerate_sysfs_metrics(brw, sysfs_dev_dir); + if (kernel_has_dynamic_config_support(brw, sysfs_dev_dir)) + init_oa_configs(brw, sysfs_dev_dir); + else + enumerate_sysfs_metrics(brw, sysfs_dev_dir); } brw->perfquery.unaccumulated = -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 0/5] i965: add support for loadable OA configs
Hi again, Improvement on detection of loadable configs as suggested by Chris. Change is in patch 4. Cheers, Lionel Landwerlin (5): i965: perf: make revision variable available i965: perf: factorize code for availability i965: perf: list registers to program for queries i965: perf: add support for userspace configurations i965: add a debug option to disable oa config loading src/intel/common/gen_debug.c | 1 + src/intel/common/gen_debug.h | 2 +- src/mesa/drivers/dri/i965/brw_context.h | 16 src/mesa/drivers/dri/i965/brw_oa.py | 80 ++--- src/mesa/drivers/dri/i965/brw_performance_query.c | 103 +- src/mesa/drivers/dri/i965/intel_screen.c | 9 +- src/mesa/drivers/dri/i965/intel_screen.h | 3 + 7 files changed, 190 insertions(+), 24 deletions(-) -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 1/5] i965: perf: make revision variable available
This will be used in the next commit to build up register programming. Signed-off-by: Lionel Landwerlin --- src/mesa/drivers/dri/i965/brw_context.h | 1 + src/mesa/drivers/dri/i965/brw_oa.py | 1 + src/mesa/drivers/dri/i965/brw_performance_query.c | 4 +++- src/mesa/drivers/dri/i965/intel_screen.c | 9 ++--- src/mesa/drivers/dri/i965/intel_screen.h | 3 +++ 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 2274fe5c80e..8782de814c7 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1104,6 +1104,7 @@ struct brw_context uint64_t subslice_mask; /** $SubsliceMask */ uint64_t gt_min_freq; /** $GpuMinFrequency */ uint64_t gt_max_freq; /** $GpuMaxFrequency */ + uint64_t revision;/** $SkuRevisionId */ } sys_vars; /* OA metric sets, indexed by GUID, as know by Mesa at build time, diff --git a/src/mesa/drivers/dri/i965/brw_oa.py b/src/mesa/drivers/dri/i965/brw_oa.py index 254c512a7da..91f7ecb5731 100644 --- a/src/mesa/drivers/dri/i965/brw_oa.py +++ b/src/mesa/drivers/dri/i965/brw_oa.py @@ -175,6 +175,7 @@ hw_vars["$SubsliceMask"] = "brw->perfquery.sys_vars.subslice_mask" hw_vars["$GpuTimestampFrequency"] = "brw->perfquery.sys_vars.timestamp_frequency" hw_vars["$GpuMinFrequency"] = "brw->perfquery.sys_vars.gt_min_freq" hw_vars["$GpuMaxFrequency"] = "brw->perfquery.sys_vars.gt_max_freq" +hw_vars["$SkuRevisionId"] = "brw->perfquery.sys_vars.revision" def output_rpn_equation_code(set, counter, equation, counter_vars): c("/* RPN equation: " + equation + " */") diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index d8680b48793..4b585c95b7d 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -1812,6 +1812,7 @@ init_oa_sys_vars(struct brw_context *brw, const char *sysfs_dev_dir) { const struct gen_device_info *devinfo = &brw->screen->devinfo; uint64_t min_freq_mhz = 0, max_freq_mhz = 0; + __DRIscreen *screen = brw->screen->driScrnPriv; if (!read_sysfs_drm_device_file_uint64(brw, sysfs_dev_dir, "gt_min_freq_mhz", @@ -1826,6 +1827,8 @@ init_oa_sys_vars(struct brw_context *brw, const char *sysfs_dev_dir) brw->perfquery.sys_vars.gt_min_freq = min_freq_mhz * 100; brw->perfquery.sys_vars.gt_max_freq = max_freq_mhz * 100; brw->perfquery.sys_vars.timestamp_frequency = devinfo->timestamp_frequency; + + brw->perfquery.sys_vars.revision = intel_device_get_revision(screen->fd); brw->perfquery.sys_vars.n_eu_slices = devinfo->num_slices; /* Assuming uniform distribution of subslices per slices. */ brw->perfquery.sys_vars.n_eu_sub_slices = devinfo->num_subslices[0]; @@ -1848,7 +1851,6 @@ init_oa_sys_vars(struct brw_context *brw, const char *sysfs_dev_dir) } else unreachable("not reached"); } else { - __DRIscreen *screen = brw->screen->driScrnPriv; drm_i915_getparam_t gp; int ret; int slice_mask = 0; diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index f98475997d3..3d617f99436 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -2052,14 +2052,9 @@ set_max_gl_versions(struct intel_screen *screen) /** * Return the revision (generally the revid field of the PCI header) of the * graphics device. - * - * XXX: This function is useful to keep around even if it is not currently in - * use. It is necessary for new platforms and revision specific workarounds or - * features. Please don't remove it so that we know it at least continues to - * build. */ -static __attribute__((__unused__)) int -brw_get_revision(int fd) +int +intel_device_get_revision(int fd) { struct drm_i915_getparam gp; int revision; diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h index 41e1dbdd4e9..7948617b7f0 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.h +++ b/src/mesa/drivers/dri/i965/intel_screen.h @@ -136,6 +136,9 @@ double get_time(void); const int* intel_supported_msaa_modes(const struct intel_screen *screen); +int +intel_device_get_revision(int fd); + static inline bool can_do_pipelined_register_writes(const struct intel_screen *screen) { -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Question about implementing viewport transfer and const load in nir
On Sat, Aug 26, 2017 at 9:40 AM, Qiang Yu wrote: > Hi guys, > > When working on lima gp compiler, I come across two problems about > inserting extra uniform > and instructions in nir for the driver and don't know where's the > right place to do it. So I'd like > to hear your opinion and if there's other driver already did so. > > 1. viewport transfer > lima gp needs embed viewport transfer into vertex shader, so need to > insert a uniform which > holds the scale and transfer and some instruction to apply the > calculation to the gl_Position > varying output. If do this in driver callback create_vs_state(), seems > won't affect the state > tracker to allocate uniform space for it, maybe add something in > st_create_vp_variant()? > > 2. const load > lima gp needs const be loaded just as uniform, so I have to allocate > uniform space for them. > Besides the same problem as 1 (where to do it), the const node may be > eliminated after driver > nir optimization and won't have a base filed as uniform. > > Seems some of these (space allocation) need be done in non-driver > layer and some (instruction > insertion and uniform base assign to const) can be done in driver. > > BTW. lima gp can only have one uniform buffer, so I can't just use a > dedicated uniform buffer > for viewport transfer and const uniform. > btw, does lima have some way to write to memory from cmdstream (ie. without setting up a full blown draw)? If so perhaps you could get away with leaving some extra space at the end of your uniform buffer that you copy driver internal uniforms into before kicking the draw? fwiw, freedreno does have driver specific uniforms, see ir3_driver_param. Although normal uniforms (ie. non-UBO) get copied into internal memory, so I just upload driver specific uniforms (as needed) and immediates to the tail of the uniform memory before the draw. BR, -R ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 102466] gallium/auxiliary/util/u_cpu_detect.c: 2 * bad condition ?
https://bugs.freedesktop.org/show_bug.cgi?id=102466 --- Comment #3 from Emil Velikov --- (In reply to dcb314 from comment #2) > I think you missed the point. Any expression ANDed with 6 will be != 6. > How about when xgetbv() returns say 6 or 7 (amongst others)? Also keep in mind that the file depends on multiple defines. Based on a _very_ old play with Mesa + cppcheck, the latter was tripping quite often, producing false positives. The OS/compiler request does not sound that unreasonable ;-) -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 102466] gallium/auxiliary/util/u_cpu_detect.c: 2 * bad condition ?
https://bugs.freedesktop.org/show_bug.cgi?id=102466 --- Comment #2 from dcb...@hotmail.com --- (In reply to Gert Wollny from comment #1) > It would be helpful to add on which OS and with which compiler and for what > architecture you were compiling mesa, because depending on the build > environment xgetbv() may simply return 0 as a fallback (see lines 259-277 in > the same file). I think you missed the point. Any expression ANDed with 6 will be != 6. OS, compiler and arch independent. For the record, Linux, gcc and x86_64, although I used a static analyser called cppcheck to find this bug. -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/5] i965: perf: add support for userspace configurations
On 29/08/17 12:15, Chris Wilson wrote: Quoting Lionel Landwerlin (2017-08-29 11:58:57) This allows us to deploy new configurations without touching the kernel. Signed-off-by: Lionel Landwerlin --- src/mesa/drivers/dri/i965/brw_performance_query.c | 97 ++- 1 file changed, 96 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index 4b585c95b7d..901cbb464e9 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -1807,6 +1807,96 @@ read_sysfs_drm_device_file_uint64(struct brw_context *brw, return read_file_uint64(buf, value); } +static bool +kernel_has_dynamic_config_support(int drm_fd, const char *sysfs_dev_dir) +{ +struct drm_i915_perf_oa_config config; +const char *uuid = "01234567-0123-0123-0123-0123456789ab"; +uint32_t mux_regs[] = { 0x9888 /* NOA_WRITE */, 0x0 }; +char config_path[256]; +struct stat sb; +uint64_t config_id; +int ret; + +snprintf(config_path, sizeof(config_path), + "%s/metrics/%s/id", sysfs_dev_dir, uuid); + +if (stat(config_path, &sb) == 0) { + if (!read_file_uint64(config_path, &config_id)) + return false; + + if (ioctl(drm_fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config_id) == 0) + return true; So you acknowledge the race condition with another process also doing the same test, or worse if they genuinely used that uuid for an actual config. Thanks, I should randomize that. +} + +memset(&config, 0, sizeof(config)); +memcpy(config.uuid, uuid, sizeof(config.uuid)); + +config.n_mux_regs = 1; +config.mux_regs_ptr = (uintptr_t) mux_regs; + +/* Create a new config */ +ret = ioctl(drm_fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &config); +if (ret < 0) +return false; A trick you may like to try is to create an invalid config that generates something other than EINVAL so that you can then distinguish with the non-existent ioctl cmd error. Since you reserved id=0 for invalid, a good choice would be static bool has_dyn_config() { uint64_t config_id = 0; return sys_ioctl(drm_fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config_id) == -ENOENT; } Thanks! -Chris ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/5] i965: perf: add support for userspace configurations
Quoting Lionel Landwerlin (2017-08-29 11:58:57) > This allows us to deploy new configurations without touching the > kernel. > > Signed-off-by: Lionel Landwerlin > --- > src/mesa/drivers/dri/i965/brw_performance_query.c | 97 > ++- > 1 file changed, 96 insertions(+), 1 deletion(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c > b/src/mesa/drivers/dri/i965/brw_performance_query.c > index 4b585c95b7d..901cbb464e9 100644 > --- a/src/mesa/drivers/dri/i965/brw_performance_query.c > +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c > @@ -1807,6 +1807,96 @@ read_sysfs_drm_device_file_uint64(struct brw_context > *brw, > return read_file_uint64(buf, value); > } > > +static bool > +kernel_has_dynamic_config_support(int drm_fd, const char *sysfs_dev_dir) > +{ > +struct drm_i915_perf_oa_config config; > +const char *uuid = "01234567-0123-0123-0123-0123456789ab"; > +uint32_t mux_regs[] = { 0x9888 /* NOA_WRITE */, 0x0 }; > +char config_path[256]; > +struct stat sb; > +uint64_t config_id; > +int ret; > + > +snprintf(config_path, sizeof(config_path), > + "%s/metrics/%s/id", sysfs_dev_dir, uuid); > + > +if (stat(config_path, &sb) == 0) { > + if (!read_file_uint64(config_path, &config_id)) > + return false; > + > + if (ioctl(drm_fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config_id) == 0) > + return true; So you acknowledge the race condition with another process also doing the same test, or worse if they genuinely used that uuid for an actual config. > +} > + > +memset(&config, 0, sizeof(config)); > +memcpy(config.uuid, uuid, sizeof(config.uuid)); > + > +config.n_mux_regs = 1; > +config.mux_regs_ptr = (uintptr_t) mux_regs; > + > +/* Create a new config */ > +ret = ioctl(drm_fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &config); > +if (ret < 0) > +return false; A trick you may like to try is to create an invalid config that generates something other than EINVAL so that you can then distinguish with the non-existent ioctl cmd error. Since you reserved id=0 for invalid, a good choice would be static bool has_dyn_config() { uint64_t config_id = 0; return sys_ioctl(drm_fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config_id) == -ENOENT; } -Chris ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 00/10] egl: mixed bag of fixes/cleanups
[Dropping Neil's email, as it bounces] On 29 August 2017 at 11:43, Daniel Stone wrote: egl: allow RGB565 formats in eglCreateWaylandBufferFromImageWL >>> >>> I've been avoiding trying to expand use of this API, in all honesty. >>> >> Any particular reason? >> >> Quick search on the webs + my system shows no users of the extension, >> apart from weston-nested. >> Even there it's not a hard requirement. >> >> Any ideas if the extension was ever used [elsewhere], should we >> consider deprecating it? > > I haven't seen much use of it, myself. It's also only really useful > for wl_drm; I'd prefer to push zwp_linux_dmabuf instead. It also > relies on a more perfect proxying of wl_drm than we necessarily > actually implement. > Right. So I guess as weston-nested is updated to use wl_dmabuf we could sketch an evil plan ;-) egl: add juicy comment about WL_bind_wayland_display + wl_drm/wl_dmabuf >>> >>> It depends on a couple of things, really. Firstly, on Mesa's general >>> feature deprecation schedule. Secondly, on external support >>> (Weston/Mutter/Enlightenment are fine, no support yet in >>> KWin/WLC/Smithay/...). Thirdly, on how cleanly we can separate it. If >>> we can break out all the BindWaylandDisplay code more than we already >>> have, it might be easier to maintain. And lastly, on NVIDIA: I believe >>> being a Streams provider requires BindWaylandDisplay, so we might need >>> that to be replaced before we can rip it out of Mesa. >>> >>> Having the comment expanded to cover these might be nice. >> >> Fully agree on all points, although I think the priority is in the opposite >> way. >> But above all does the extension make sense, or even yet, can it be >> implemented with wl_dmabuf? > > The one thing it provides over and above wl_dmabuf is device > advertisement, to guide the implementation towards a particular GPU. > So maybe it makes sense to keep it just for that, purely for render > nodes. > Ack, so it'll be a fairly trivial implementation. > Also VA-API still uses wl_drm, so we need to wean that away from it first. > Right, that's why I flipped the order ;-) Although they are considering breaking the ABI with the v2.0 branch. For which I have some sweet patches - purge wl_drm, forbid use of card nodes... Need to finish that one of the of these days. egl/wayland: group wl_win specific code together egl/wayland: make sure HAS_$FORMAT is set for wl_dmabuf >>> >>> Reviewed-by: Daniel Stone >>> >> As above - I'm guessing the r-b stands for both patches? > > Yep! > Ack. Thanks Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 102466] gallium/auxiliary/util/u_cpu_detect.c: 2 * bad condition ?
https://bugs.freedesktop.org/show_bug.cgi?id=102466 --- Comment #1 from Gert Wollny --- It would be helpful to add on which OS and with which compiler and for what architecture you were compiling mesa, because depending on the build environment xgetbv() may simply return 0 as a fallback (see lines 259-277 in the same file). -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/5] i965: add a debug option to disable oa config loading
This provides a good way to verify we haven't broken using the perf driver on older kernels (which don't have the oa config loading mechanism). Signed-off-by: Lionel Landwerlin --- src/intel/common/gen_debug.c | 1 + src/intel/common/gen_debug.h | 2 +- src/mesa/drivers/dri/i965/brw_performance_query.c | 3 ++- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/intel/common/gen_debug.c b/src/intel/common/gen_debug.c index b604d56ef86..a17d628f577 100644 --- a/src/intel/common/gen_debug.c +++ b/src/intel/common/gen_debug.c @@ -57,6 +57,7 @@ static const struct debug_control debug_control[] = { { "vert",DEBUG_VERTS }, { "dri", DEBUG_DRI }, { "sf", DEBUG_SF }, + { "no-oaconfig", DEBUG_NO_OACONFIG }, { "wm", DEBUG_WM }, { "urb", DEBUG_URB }, { "vs", DEBUG_VS }, diff --git a/src/intel/common/gen_debug.h b/src/intel/common/gen_debug.h index d290303682e..8c8cce4f846 100644 --- a/src/intel/common/gen_debug.h +++ b/src/intel/common/gen_debug.h @@ -57,7 +57,7 @@ extern uint64_t INTEL_DEBUG; #define DEBUG_VERTS (1ull << 13) #define DEBUG_DRI (1ull << 14) #define DEBUG_SF (1ull << 15) -/* Hole - feel free to reuse (1ull << 16) */ +#define DEBUG_NO_OACONFIG (1ull << 16) #define DEBUG_WM (1ull << 17) #define DEBUG_URB (1ull << 18) #define DEBUG_VS (1ull << 19) diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index 901cbb464e9..16e356e70b2 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -2146,7 +2146,8 @@ brw_init_perf_query_info(struct gl_context *ctx) */ oa_register(brw); - if (kernel_has_dynamic_config_support(screen->fd, sysfs_dev_dir)) + if (likely((INTEL_DEBUG & DEBUG_NO_OACONFIG) == 0) && + kernel_has_dynamic_config_support(screen->fd, sysfs_dev_dir)) init_oa_configs(brw, sysfs_dev_dir); else enumerate_sysfs_metrics(brw, sysfs_dev_dir); -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/5] i965: perf: list registers to program for queries
Signed-off-by: Lionel Landwerlin --- src/mesa/drivers/dri/i965/brw_context.h | 15 ++ src/mesa/drivers/dri/i965/brw_oa.py | 51 + 2 files changed, 66 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 8782de814c7..5508aaf6128 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -603,6 +603,11 @@ enum brw_query_kind { PIPELINE_STATS }; +struct brw_perf_query_register_prog { + uint32_t reg; + uint32_t val; +}; + struct brw_perf_query_info { enum brw_query_kind kind; @@ -622,6 +627,16 @@ struct brw_perf_query_info int a_offset; int b_offset; int c_offset; + + /* Register programming for a given query */ + struct brw_perf_query_register_prog *flex_regs; + uint32_t n_flex_regs; + + struct brw_perf_query_register_prog *mux_regs; + uint32_t n_mux_regs; + + struct brw_perf_query_register_prog *b_counter_regs; + uint32_t n_b_counter_regs; }; /** diff --git a/src/mesa/drivers/dri/i965/brw_oa.py b/src/mesa/drivers/dri/i965/brw_oa.py index 8c35923462f..576ea6687fc 100644 --- a/src/mesa/drivers/dri/i965/brw_oa.py +++ b/src/mesa/drivers/dri/i965/brw_oa.py @@ -380,6 +380,45 @@ def output_counter_report(set, counter, current_offset): return current_offset + sizeof(c_type) +register_types = { +'FLEX': 'flex_regs', +'NOA': 'mux_regs', +'OA': 'b_counter_regs', +} + +def compute_register_lengths(set): +register_lengths = {} +register_configs = set.findall('register_config') +for register_config in register_configs: +t = register_types[register_config.get('type')] +if t not in register_lengths: +register_lengths[t] = len(register_config.findall('register')) +else: +register_lengths[t] += len(register_config.findall('register')) + +return register_lengths + + +def generate_register_configs(set): +register_configs = set.findall('register_config') +for register_config in register_configs: +t = register_types[register_config.get('type')] + +availability = register_config.get('availability') +if availability: +output_availability(set, availability, register_config.get('type') + ' register config') +c_indent(3) + +for register in register_config.findall('register'): +c("query->%s[query->n_%s++] = (struct brw_perf_query_register_prog) { .reg = %s, .val = %s };" % + (t, t, register.get('address'), register.get('value'))) + +if availability: +c_outdent(3) +c("}") +c("\n") + + def main(): global c_file global header_file @@ -475,6 +514,12 @@ def main(): max_values[counter.get('symbol_name')] = output_counter_max(set, counter, empty_vars) counter_vars["$" + counter.get('symbol_name')] = counter +c("\n") +register_lengths = compute_register_lengths(set); +for reg_type, reg_length in register_lengths.iteritems(): +c("static struct brw_perf_query_register_prog {0}_{1}_{2}[{3}];".format(chipset, + set.get('underscore_name'), + reg_type, reg_length)) c("\nstatic struct brw_perf_query_counter {0}_{1}_query_counters[{2}];\n".format(chipset, set.get('underscore_name'), len(counters))) c("static struct brw_perf_query_info " + chipset + "_" + set.get('underscore_name') + "_query = {\n") @@ -510,6 +555,10 @@ def main(): .c_offset = 46, """)) +for reg_type, reg_length in register_lengths.iteritems(): +c(".{0} = {1}_{2}_{3},".format(reg_type, chipset, set.get('underscore_name'), reg_type)) +c(".n_{0} = 0, /* Determined at runtime */".format(reg_type)) + c_outdent(3) c("};\n") @@ -528,6 +577,8 @@ def main(): c("\nif (!query->data_size) {") c_indent(3) +generate_register_configs(set) + offset = 0 for counter in counters: offset = output_counter_report(set, counter, offset) -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/5] i965: perf: add support for userspace configurations
This allows us to deploy new configurations without touching the kernel. Signed-off-by: Lionel Landwerlin --- src/mesa/drivers/dri/i965/brw_performance_query.c | 97 ++- 1 file changed, 96 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index 4b585c95b7d..901cbb464e9 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -1807,6 +1807,96 @@ read_sysfs_drm_device_file_uint64(struct brw_context *brw, return read_file_uint64(buf, value); } +static bool +kernel_has_dynamic_config_support(int drm_fd, const char *sysfs_dev_dir) +{ +struct drm_i915_perf_oa_config config; +const char *uuid = "01234567-0123-0123-0123-0123456789ab"; +uint32_t mux_regs[] = { 0x9888 /* NOA_WRITE */, 0x0 }; +char config_path[256]; +struct stat sb; +uint64_t config_id; +int ret; + +snprintf(config_path, sizeof(config_path), + "%s/metrics/%s/id", sysfs_dev_dir, uuid); + +if (stat(config_path, &sb) == 0) { + if (!read_file_uint64(config_path, &config_id)) + return false; + + if (ioctl(drm_fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config_id) == 0) + return true; +} + +memset(&config, 0, sizeof(config)); +memcpy(config.uuid, uuid, sizeof(config.uuid)); + +config.n_mux_regs = 1; +config.mux_regs_ptr = (uintptr_t) mux_regs; + +/* Create a new config */ +ret = ioctl(drm_fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &config); +if (ret < 0) +return false; + +config_id = ret; +ioctl(drm_fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config_id); + +return true; +} + +static void +init_oa_configs(struct brw_context *brw, const char *sysfs_dev_dir) +{ + __DRIscreen *screen = brw->screen->driScrnPriv; + struct hash_entry *entry; + + hash_table_foreach(brw->perfquery.oa_metrics_table, entry) { + struct brw_perf_query_info *query = entry->data; + struct drm_i915_perf_oa_config config; + char config_path[256]; + uint64_t config_id; + int ret; + + snprintf(config_path, sizeof(config_path), + "%s/metrics/%s/id", sysfs_dev_dir, query->guid); + + if (read_file_uint64(config_path, &config_id)) { + if (config_id <= 1) +continue; + + ioctl(screen->fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config_id); + } + + memset(&config, 0, sizeof(config)); + + memcpy(config.uuid, query->guid, sizeof(config.uuid)); + + config.n_mux_regs = query->n_mux_regs; + config.mux_regs_ptr = (uintptr_t) query->mux_regs; + + config.n_boolean_regs = query->n_b_counter_regs; + config.boolean_regs_ptr = (uintptr_t) query->b_counter_regs; + + config.n_flex_regs = query->n_flex_regs; + config.flex_regs_ptr = (uintptr_t) query->flex_regs; + + ret = ioctl(screen->fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &config); + if (ret < 0) { + DBG("Failed to load \"%s\" (%s) metrics set in kernel: %s\n", + query->name, query->guid, strerror(errno)); + } else { + struct brw_perf_query_info *registred_query = +append_query_info(brw); + *registred_query = *query; + registred_query->oa_metrics_set_id = ret; + DBG("metric set registred: id = %" PRIu64", guid = %s\n", + registred_query->oa_metrics_set_id, query->guid); + } + } +} + static bool init_oa_sys_vars(struct brw_context *brw, const char *sysfs_dev_dir) { @@ -2045,6 +2135,8 @@ brw_init_perf_query_info(struct gl_context *ctx) get_sysfs_dev_dir(brw, sysfs_dev_dir, sizeof(sysfs_dev_dir)) && init_oa_sys_vars(brw, sysfs_dev_dir)) { + __DRIscreen *screen = brw->screen->driScrnPriv; + brw->perfquery.oa_metrics_table = _mesa_hash_table_create(NULL, _mesa_key_hash_string, _mesa_key_string_equal); @@ -2054,7 +2146,10 @@ brw_init_perf_query_info(struct gl_context *ctx) */ oa_register(brw); - enumerate_sysfs_metrics(brw, sysfs_dev_dir); + if (kernel_has_dynamic_config_support(screen->fd, sysfs_dev_dir)) + init_oa_configs(brw, sysfs_dev_dir); + else + enumerate_sysfs_metrics(brw, sysfs_dev_dir); } brw->perfquery.unaccumulated = -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/5] i965: perf: make revision variable available
This will be used in the next commit to build up register programming. Signed-off-by: Lionel Landwerlin --- src/mesa/drivers/dri/i965/brw_context.h | 1 + src/mesa/drivers/dri/i965/brw_oa.py | 1 + src/mesa/drivers/dri/i965/brw_performance_query.c | 4 +++- src/mesa/drivers/dri/i965/intel_screen.c | 9 ++--- src/mesa/drivers/dri/i965/intel_screen.h | 3 +++ 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 2274fe5c80e..8782de814c7 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1104,6 +1104,7 @@ struct brw_context uint64_t subslice_mask; /** $SubsliceMask */ uint64_t gt_min_freq; /** $GpuMinFrequency */ uint64_t gt_max_freq; /** $GpuMaxFrequency */ + uint64_t revision;/** $SkuRevisionId */ } sys_vars; /* OA metric sets, indexed by GUID, as know by Mesa at build time, diff --git a/src/mesa/drivers/dri/i965/brw_oa.py b/src/mesa/drivers/dri/i965/brw_oa.py index 254c512a7da..91f7ecb5731 100644 --- a/src/mesa/drivers/dri/i965/brw_oa.py +++ b/src/mesa/drivers/dri/i965/brw_oa.py @@ -175,6 +175,7 @@ hw_vars["$SubsliceMask"] = "brw->perfquery.sys_vars.subslice_mask" hw_vars["$GpuTimestampFrequency"] = "brw->perfquery.sys_vars.timestamp_frequency" hw_vars["$GpuMinFrequency"] = "brw->perfquery.sys_vars.gt_min_freq" hw_vars["$GpuMaxFrequency"] = "brw->perfquery.sys_vars.gt_max_freq" +hw_vars["$SkuRevisionId"] = "brw->perfquery.sys_vars.revision" def output_rpn_equation_code(set, counter, equation, counter_vars): c("/* RPN equation: " + equation + " */") diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index d8680b48793..4b585c95b7d 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -1812,6 +1812,7 @@ init_oa_sys_vars(struct brw_context *brw, const char *sysfs_dev_dir) { const struct gen_device_info *devinfo = &brw->screen->devinfo; uint64_t min_freq_mhz = 0, max_freq_mhz = 0; + __DRIscreen *screen = brw->screen->driScrnPriv; if (!read_sysfs_drm_device_file_uint64(brw, sysfs_dev_dir, "gt_min_freq_mhz", @@ -1826,6 +1827,8 @@ init_oa_sys_vars(struct brw_context *brw, const char *sysfs_dev_dir) brw->perfquery.sys_vars.gt_min_freq = min_freq_mhz * 100; brw->perfquery.sys_vars.gt_max_freq = max_freq_mhz * 100; brw->perfquery.sys_vars.timestamp_frequency = devinfo->timestamp_frequency; + + brw->perfquery.sys_vars.revision = intel_device_get_revision(screen->fd); brw->perfquery.sys_vars.n_eu_slices = devinfo->num_slices; /* Assuming uniform distribution of subslices per slices. */ brw->perfquery.sys_vars.n_eu_sub_slices = devinfo->num_subslices[0]; @@ -1848,7 +1851,6 @@ init_oa_sys_vars(struct brw_context *brw, const char *sysfs_dev_dir) } else unreachable("not reached"); } else { - __DRIscreen *screen = brw->screen->driScrnPriv; drm_i915_getparam_t gp; int ret; int slice_mask = 0; diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index f98475997d3..3d617f99436 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -2052,14 +2052,9 @@ set_max_gl_versions(struct intel_screen *screen) /** * Return the revision (generally the revid field of the PCI header) of the * graphics device. - * - * XXX: This function is useful to keep around even if it is not currently in - * use. It is necessary for new platforms and revision specific workarounds or - * features. Please don't remove it so that we know it at least continues to - * build. */ -static __attribute__((__unused__)) int -brw_get_revision(int fd) +int +intel_device_get_revision(int fd) { struct drm_i915_getparam gp; int revision; diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h index 41e1dbdd4e9..7948617b7f0 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.h +++ b/src/mesa/drivers/dri/i965/intel_screen.h @@ -136,6 +136,9 @@ double get_time(void); const int* intel_supported_msaa_modes(const struct intel_screen *screen); +int +intel_device_get_revision(int fd); + static inline bool can_do_pipelined_register_writes(const struct intel_screen *screen) { -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/5] i965: perf: factorize code for availability
Signed-off-by: Lionel Landwerlin --- src/mesa/drivers/dri/i965/brw_oa.py | 28 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_oa.py b/src/mesa/drivers/dri/i965/brw_oa.py index 91f7ecb5731..8c35923462f 100644 --- a/src/mesa/drivers/dri/i965/brw_oa.py +++ b/src/mesa/drivers/dri/i965/brw_oa.py @@ -325,6 +325,21 @@ semantic_type_map = { "ratio": "event" } +def output_availability(set, availability, counter_name): +expression = splice_rpn_expression(set, counter_name, availability) +lines = expression.split(' && ') +n_lines = len(lines) +if n_lines == 1: +c("if (" + lines[0] + ") {") +else: +c("if (" + lines[0] + " &&") +c_indent(4) +for i in range(1, (n_lines - 1)): +c(lines[i] + " &&") +c(lines[(n_lines - 1)] + ") {") +c_outdent(4) + + def output_counter_report(set, counter, current_offset): data_type = counter.get('data_type') data_type_uc = data_type.upper() @@ -343,18 +358,7 @@ def output_counter_report(set, counter, current_offset): availability = counter.get('availability') if availability: -expression = splice_rpn_expression(set, counter, availability) -lines = expression.split(' && ') -n_lines = len(lines) -if n_lines == 1: -c("if (" + lines[0] + ") {") -else: -c("if (" + lines[0] + " &&") -c_indent(4) -for i in range(1, (n_lines - 1)): -c(lines[i] + " &&") -c(lines[(n_lines - 1)] + ") {") -c_outdent(4) +output_availability(set, availability, counter.get('name')) c_indent(3) c("counter = &query->counters[query->n_counters++];\n") -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/5] i965: add support for loadable OA configs
Hi, Some recent work in the i915 kernel driver enables us to load OA configs from userspace, so new configs can be deployed without having to modify the kernel. This series makes use of this feature in i965 if available. Cheers, Lionel Landwerlin (5): i965: perf: make revision variable available i965: perf: factorize code for availability i965: perf: list registers to program for queries i965: perf: add support for userspace configurations i965: add a debug option to disable oa config loading src/intel/common/gen_debug.c | 1 + src/intel/common/gen_debug.h | 2 +- src/mesa/drivers/dri/i965/brw_context.h | 16 src/mesa/drivers/dri/i965/brw_oa.py | 80 ++--- src/mesa/drivers/dri/i965/brw_performance_query.c | 102 +- src/mesa/drivers/dri/i965/intel_screen.c | 9 +- src/mesa/drivers/dri/i965/intel_screen.h | 3 + 7 files changed, 191 insertions(+), 22 deletions(-) -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev