[Mesa-dev] [PATCH 11/15] i965/vec4: add support for packing tcs outputs
Reviewed-by: Edward O'Callaghan --- src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp | 7 +++ 1 file changed, 7 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp index 8bd150a..4bc3be7 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp @@ -406,6 +406,13 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) } } + unsigned first_component = nir_intrinsic_component(instr); + if (first_component) { + assert(swiz == BRW_SWIZZLE_XYZW); + swiz = BRW_SWZ_COMP_OUTPUT(first_component); + mask = mask << first_component; + } + emit_urb_write(swizzle(value, swiz), mask, imm_offset, indirect_offset); break; -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 08/15] i965/vec4: add support for packing vs/gs/tes outputs
Here we create a new output_generic_reg array with the ability to store the dst_reg for each component of user defined varyings. This is needed as the previous code only stored the dst_reg based on the varying location which meant packed varyings would overwrite each other. --- src/mesa/drivers/dri/i965/brw_vec4.h | 3 +++ src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 9 ++- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 37 +++--- 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 3043147..4236b51 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -114,6 +114,8 @@ public: * for the ir->location's used. */ dst_reg output_reg[BRW_VARYING_SLOT_COUNT]; + dst_reg output_generic_reg[MAX_VARYINGS_INCL_PATCH][4]; + unsigned output_generic_num_components[MAX_VARYINGS_INCL_PATCH][4]; const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT]; int uniforms; @@ -270,6 +272,7 @@ public: void emit_ndc_computation(); void emit_psiz_and_flags(dst_reg reg); vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying); + void emit_generic_urb_slot(dst_reg reg, int varying, int component); virtual void emit_urb_slot(dst_reg reg, int varying); void emit_shader_time_begin(); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 33ad852..e5a091d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -416,7 +416,14 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F, instr->num_components); - output_reg[varying] = dst_reg(src); + if (varying >= VARYING_SLOT_VAR0) { + unsigned c = nir_intrinsic_component(instr); + unsigned v = varying - VARYING_SLOT_VAR0; + output_generic_reg[v][c] = dst_reg(src); + output_generic_num_components[v][c] = instr->num_components; + } else { + output_reg[varying] = dst_reg(src); + } break; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 652b453..e6eea69 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1279,13 +1279,35 @@ vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying) assert(varying < VARYING_SLOT_MAX); assert(output_reg[varying].type == reg.type); current_annotation = output_reg_annotation[varying]; - if (output_reg[varying].file != BAD_FILE) + if (output_reg[varying].file != BAD_FILE) { return emit(MOV(reg, src_reg(output_reg[varying]))); - else + } else return NULL; } void +vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying, int component) +{ + assert(varying < VARYING_SLOT_MAX); + assert(varying >= VARYING_SLOT_VAR0); + varying = varying - VARYING_SLOT_VAR0; + + unsigned num_comps = output_generic_num_components[varying][component]; + if (num_comps == 0) + return; + + assert(output_generic_reg[varying][component].type == reg.type); + current_annotation = output_reg_annotation[varying]; + if (output_generic_reg[varying][component].file != BAD_FILE) { + src_reg src = src_reg(output_generic_reg[varying][component]); + src.swizzle = BRW_SWZ_COMP_OUTPUT(component); + reg.writemask = + brw_writemask_for_component_packing(num_comps, component); + emit(MOV(reg, src)); + } +} + +void vec4_visitor::emit_urb_slot(dst_reg reg, int varying) { reg.type = BRW_REGISTER_TYPE_F; @@ -1324,7 +1346,13 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying) /* No need to write to this slot */ break; default: - emit_generic_urb_slot(reg, varying); + if (varying >= VARYING_SLOT_VAR0) { + for (int i = 0; i < 4; i++) { +emit_generic_urb_slot(reg, varying, i); + } + } else { + emit_generic_urb_slot(reg, varying); + } break; } } @@ -1772,6 +1800,9 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler, this->current_annotation = NULL; memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation)); + memset(this->output_generic_num_components, 0, + sizeof(this->output_generic_num_components)); + this->virtual_grf_start = NULL; this->virtual_grf_end = NULL; this->live_intervals = NULL; -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 05/15] i965: add helpers for creating component layout swizzle
This will be used to swizzle components to the beginning or end of the vector based on the component layout qualifier and whether we are doing a load or store. Reviewed-by: Edward O'Callaghan --- src/mesa/drivers/dri/i965/brw_reg.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h index 38cf8e3..7eab7b5 100644 --- a/src/mesa/drivers/dri/i965/brw_reg.h +++ b/src/mesa/drivers/dri/i965/brw_reg.h @@ -88,6 +88,9 @@ struct brw_device_info; #define BRW_SWIZZLE_ZWZW BRW_SWIZZLE4(2,3,2,3) #define BRW_SWIZZLE_WZYX BRW_SWIZZLE4(3,2,1,0) +#define BRW_SWZ_COMP_INPUT(comp) (BRW_SWIZZLE_XYZW >> ((comp)*2)) +#define BRW_SWZ_COMP_OUTPUT(comp) (BRW_SWIZZLE_XYZW << ((comp)*2)) + static inline bool brw_is_single_value_swizzle(unsigned swiz) { -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 04/15] nir: add doubles component packing support
This makes sure we give the correct driver location for doubles when using component packing. --- src/compiler/nir/nir_lower_io.c | 16 1 file changed, 16 insertions(+) diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index e480264..7a72e69 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -75,6 +75,22 @@ nir_assign_var_locations(struct exec_list *var_list, unsigned *size, if (locations[idx][var->data.index] == -1) { var->data.driver_location = location; locations[idx][var->data.index] = location; + +/* A dvec3 can be packed with a double we need special handling + * for this as we are packing across two locations. + */ +if (glsl_get_base_type(var->type) == GLSL_TYPE_DOUBLE && +glsl_get_vector_elements(var->type) == 3) { + /* Hack around type_size functions that expect vectors to be +* padded out to vec4. +*/ + unsigned dsize = type_size(glsl_double_type()); + unsigned offset = + dsize == type_size(glsl_float_type()) ? dsize : dsize * 2; + + locations[idx + 1][var->data.index] = location + offset; +} + location += type_size(var->type); } else { var->data.driver_location = locations[idx][var->data.index]; -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 14/15] i965: enable ARB_enhanced_layouts for gen6+
Reviewed-by: Edward O'Callaghan --- src/mesa/drivers/dri/i965/intel_extensions.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index c557137..ec89094 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -294,6 +294,7 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_conditional_render_inverted = true; ctx->Extensions.ARB_cull_distance = true; ctx->Extensions.ARB_draw_buffers_blend = true; + ctx->Extensions.ARB_enhanced_layouts = true; ctx->Extensions.ARB_ES3_compatibility = true; ctx->Extensions.ARB_fragment_layer_viewport = true; ctx->Extensions.ARB_sample_shading = true; -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 03/15] i965: add component packing support for load_output intrinsics
--- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 38 +++- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 395594f..e75e7f7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -2481,6 +2481,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, case nir_intrinsic_load_per_vertex_output: { fs_reg indirect_offset = get_indirect_offset(instr); unsigned imm_offset = instr->const_index[0]; + unsigned first_component = nir_intrinsic_component(instr); fs_inst *inst; if (indirect_offset.file == BAD_FILE) { @@ -2561,10 +2562,24 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, } bld.LOAD_PAYLOAD(dst, srcs, num_components, 0); } else { -inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, patch_handle); +if (first_component != 0) { + unsigned read_components = + instr->num_components + first_component; + fs_reg tmp = bld.vgrf(dst.type, read_components); + inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, + patch_handle); + inst->regs_written = read_components; + for (unsigned i = 0; i < instr->num_components; i++) { + bld.MOV(offset(dst, bld, i), + offset(tmp, bld, i + first_component)); + } +} else { + inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, + patch_handle); + inst->regs_written = instr->num_components; +} inst->offset = imm_offset; inst->mlen = 1; -inst->regs_written = instr->num_components; } } else { /* Indirect indexing - use per-slot offsets as well. */ @@ -2574,11 +2589,24 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, }; fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0); - - inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst, payload); + if (first_component != 0) { +unsigned read_components = + instr->num_components + first_component; +fs_reg tmp = bld.vgrf(dst.type, read_components); +inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp, +payload); +inst->regs_written = read_components; +for (unsigned i = 0; i < instr->num_components; i++) { + bld.MOV(offset(dst, bld, i), + offset(tmp, bld, i + first_component)); +} + } else { +inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst, +payload); +inst->regs_written = instr->num_components; + } inst->offset = imm_offset; inst->mlen = 2; - inst->regs_written = instr->num_components; } break; } -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 15/15] docs: mark ARB_enhanced_layouts as DONE for i965
Reviewed-by: Edward O'Callaghan --- docs/GL3.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index 1335397..ebaf4bf 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -193,11 +193,11 @@ GL 4.4, GLSL 4.40: GL_MAX_VERTEX_ATTRIB_STRIDE DONE (all drivers) GL_ARB_buffer_storage DONE (i965, nv50, nvc0, r600, radeonsi) GL_ARB_clear_texture DONE (i965, nv50, nvc0) - GL_ARB_enhanced_layouts in progress (Timothy) + GL_ARB_enhanced_layouts DONE (i965) - compile-time constant expressions DONE - explicit byte offsets for blocksDONE - forced alignment within blocks DONE - - specified vec4-slot component numbers in progress + - specified vec4-slot component numbers DONE (i965) - specified transform/feedback layout DONE - input/output block locationsDONE GL_ARB_multi_bind DONE (all drivers) -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 09/15] i965/vec4: add component packing for gs
Reviewed-by: Edward O'Callaghan --- src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp index 9ebfb27..16d2410 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp @@ -72,6 +72,8 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u32[0] + instr->const_index[0] + offset->u32[0], type); + src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr)); + /* gl_PointSize is passed in the .w component of the VUE header */ if (instr->const_index[0] == VARYING_SLOT_PSIZ) src.swizzle = BRW_SWIZZLE_; -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 12/15] i965/vec4: add support for packing tes inputs
--- src/mesa/drivers/dri/i965/brw_vec4_tes.cpp | 14 ++ 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp index 6639c86..8266a9d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp @@ -177,7 +177,9 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) case nir_intrinsic_load_input: case nir_intrinsic_load_per_vertex_input: { src_reg indirect_offset = get_indirect_offset(instr); + dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); unsigned imm_offset = instr->const_index[0]; + unsigned fist_component = nir_intrinsic_component(instr); src_reg header = input_read_header; if (indirect_offset.file != BAD_FILE) { @@ -190,8 +192,10 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) */ const unsigned max_push_slots = 24; if (imm_offset < max_push_slots) { -emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D), - src_reg(ATTR, imm_offset, glsl_type::ivec4_type))); +src_reg src = src_reg(ATTR, imm_offset, glsl_type::ivec4_type); +src.swizzle = BRW_SWZ_COMP_INPUT(fist_component); + +emit(MOV(dst, src)); prog_data->urb_read_length = MAX2(prog_data->urb_read_length, DIV_ROUND_UP(imm_offset + 1, 2)); @@ -205,12 +209,14 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) read->offset = imm_offset; read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; + src_reg src = src_reg(temp); + src.swizzle = BRW_SWZ_COMP_INPUT(fist_component); + /* Copy to target. We might end up with some funky writemasks landing * in here, but we really don't want them in the above pseudo-ops. */ - dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); dst.writemask = brw_writemask_for_size(instr->num_components); - emit(MOV(dst, src_reg(temp))); + emit(MOV(dst, src)); break; } default: -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 10/15] i965/vec4: support packing tcs inputs
Reviewed-by: Edward O'Callaghan --- src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp | 8 ++-- src/mesa/drivers/dri/i965/brw_vec4_tcs.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp index f61c612..8bd150a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp @@ -166,6 +166,7 @@ void vec4_tcs_visitor::emit_input_urb_read(const dst_reg &dst, const src_reg &vertex_index, unsigned base_offset, + unsigned first_component, const src_reg &indirect_offset) { vec4_instruction *inst; @@ -191,7 +192,9 @@ vec4_tcs_visitor::emit_input_urb_read(const dst_reg &dst, if (inst->offset == 0 && indirect_offset.file == BAD_FILE) { emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_))); } else { - emit(MOV(dst, src_reg(temp))); + src_reg src = src_reg(temp); + src.swizzle = BRW_SWZ_COMP_INPUT(first_component); + emit(MOV(dst, src)); } } @@ -267,7 +270,8 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); dst.writemask = brw_writemask_for_size(instr->num_components); - emit_input_urb_read(dst, vertex_index, imm_offset, indirect_offset); + emit_input_urb_read(dst, vertex_index, imm_offset, + nir_intrinsic_component(instr), indirect_offset); break; } case nir_intrinsic_load_input: diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.h b/src/mesa/drivers/dri/i965/brw_vec4_tcs.h index 329cd7d..d408e56 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.h +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.h @@ -60,6 +60,7 @@ protected: void emit_input_urb_read(const dst_reg &dst, const src_reg &vertex_index, unsigned base_offset, +unsigned first_component, const src_reg &indirect_offset); void emit_output_urb_read(const dst_reg &dst, unsigned base_offset, -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 13/15] i965/vec4: add packing support for tes load outputs
Reviewed-by: Edward O'Callaghan --- src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp | 17 + src/mesa/drivers/dri/i965/brw_vec4_tcs.h | 1 + 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp index 4bc3be7..30c81c5 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp @@ -201,6 +201,7 @@ vec4_tcs_visitor::emit_input_urb_read(const dst_reg &dst, void vec4_tcs_visitor::emit_output_urb_read(const dst_reg &dst, unsigned base_offset, + unsigned first_component, const src_reg &indirect_offset) { vec4_instruction *inst; @@ -216,6 +217,12 @@ vec4_tcs_visitor::emit_output_urb_read(const dst_reg &dst, read->offset = base_offset; read->mlen = 1; read->base_mrf = -1; + + if (first_component) { + src_reg src = src_reg(dst); + src.swizzle = BRW_SWZ_COMP_INPUT(first_component); + emit(MOV(dst, src)); + } } void @@ -295,14 +302,15 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) case GL_QUADS: { /* DWords 3-2 (reversed); use offset 0 and WZYX swizzle. */ dst_reg tmp(this, glsl_type::vec4_type); -emit_output_urb_read(tmp, 0, src_reg()); +emit_output_urb_read(tmp, 0, 0, src_reg()); emit(MOV(writemask(dst, WRITEMASK_XY), swizzle(src_reg(tmp), BRW_SWIZZLE_WZYX))); break; } case GL_TRIANGLES: /* DWord 4; use offset 1 but normal swizzle/writemask. */ -emit_output_urb_read(writemask(dst, WRITEMASK_X), 1, src_reg()); +emit_output_urb_read(writemask(dst, WRITEMASK_X), 1, 0, + src_reg()); break; case GL_ISOLINES: /* All channels are undefined. */ @@ -334,10 +342,11 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) } dst_reg tmp(this, glsl_type::vec4_type); - emit_output_urb_read(tmp, 1, src_reg()); + emit_output_urb_read(tmp, 1, 0, src_reg()); emit(MOV(dst, swizzle(src_reg(tmp), swiz))); } else { - emit_output_urb_read(dst, imm_offset, indirect_offset); + emit_output_urb_read(dst, imm_offset, nir_intrinsic_component(instr), + indirect_offset); } break; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.h b/src/mesa/drivers/dri/i965/brw_vec4_tcs.h index d408e56..030eb5e 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.h +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.h @@ -64,6 +64,7 @@ protected: const src_reg &indirect_offset); void emit_output_urb_read(const dst_reg &dst, unsigned base_offset, + unsigned first_component, const src_reg &indirect_offset); void emit_urb_write(const src_reg &value, unsigned writemask, -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 07/15] i965/vec4: add support for packing inputs
Reviewed-by: Edward O'Callaghan --- src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index f3b4528..33ad852 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -397,6 +397,8 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) src = src_reg(ATTR, instr->const_index[0] + const_offset->u32[0], glsl_type::uvec4_type); + /* Swizzle source based on component layout qualifier */ + src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr)); dest = get_nir_dest(instr->dest, src.type); dest.writemask = brw_writemask_for_size(instr->num_components); -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 01/15] i965: bring back type_size_vec4_times_4()
We will use this for output varyings. To make component packing simpler we will just treat all varyings as vec4s. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 13 + src/mesa/drivers/dri/i965/brw_shader.h | 1 + 2 files changed, 14 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 120d6dd..547a0c2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -506,6 +506,19 @@ type_size_scalar(const struct glsl_type *type) return 0; } +/** + * Returns the number of scalar components needed to store type, assuming + * that vectors are padded out to vec4. + * + * This has the packing rules of type_size_vec4(), but counts components + * similar to type_size_scalar(). + */ +extern "C" int +type_size_vec4_times_4(const struct glsl_type *type) +{ + return 4 * type_size_vec4(type); +} + /* Attribute arrays are loaded as one vec4 per element (or matrix column), * except for double-precision types, which are loaded as one dvec4. */ diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index dd9eb2d..e61c080 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -294,6 +294,7 @@ struct gl_linked_shader *brw_new_shader(gl_shader_stage stage); int type_size_scalar(const struct glsl_type *type); int type_size_vec4(const struct glsl_type *type); int type_size_dvec4(const struct glsl_type *type); +int type_size_vec4_times_4(const struct glsl_type *type); int type_size_vs_input(const struct glsl_type *type); unsigned tesslevel_outer_components(GLenum tes_primitive_mode); -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 02/15] i965: enable component packing for vs and fs
Rather than trying to work out the total number of components used at a location we simply treat all outputs as vec4s. --- src/mesa/drivers/dri/i965/brw_fs.h | 1 - src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 22 ++ src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 10 ++ src/mesa/drivers/dri/i965/brw_nir.c | 8 4 files changed, 16 insertions(+), 25 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 574475f..fc1e1c4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -317,7 +317,6 @@ public: fs_reg frag_stencil; fs_reg sample_mask; fs_reg outputs[VARYING_SLOT_MAX]; - unsigned output_components[VARYING_SLOT_MAX]; fs_reg dual_src_output; bool do_dual_src; int first_non_payload_grf; diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 610c151..395594f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -67,13 +67,12 @@ fs_visitor::nir_setup_single_output_varying(fs_reg *reg, } } else { assert(type->is_scalar() || type->is_vector()); - unsigned num_elements = type->vector_elements; + unsigned num_iter = 1; if (type->is_double()) - num_elements *= 2; - for (unsigned count = 0; count < num_elements; count += 4) { + num_iter = 2; + for (unsigned count = 0; count < num_iter; count++) { this->outputs[*location] = *reg; - this->output_components[*location] = MIN2(4, num_elements - count); - *reg = offset(*reg, bld, this->output_components[*location]); + *reg = offset(*reg, bld, 4); (*location)++; } } @@ -114,7 +113,6 @@ fs_visitor::nir_setup_outputs() /* Writing gl_FragColor outputs to all color regions. */ for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) { this->outputs[i] = reg; - this->output_components[i] = 4; } } else if (var->data.location == FRAG_RESULT_DEPTH) { this->frag_depth = reg; @@ -123,8 +121,6 @@ fs_visitor::nir_setup_outputs() } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) { this->sample_mask = reg; } else { -int vector_elements = var->type->without_array()->vector_elements; - /* gl_FragData or a user-defined FS output */ assert(var->data.location >= FRAG_RESULT_DATA0 && var->data.location < FRAG_RESULT_DATA0+BRW_MAX_DRAW_BUFFERS); @@ -132,8 +128,7 @@ fs_visitor::nir_setup_outputs() /* General color output. */ for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) { int output = var->data.location - FRAG_RESULT_DATA0 + i; - this->outputs[output] = offset(reg, bld, vector_elements * i); - this->output_components[output] = vector_elements; + this->outputs[output] = offset(reg, bld, 4 * i); } } break; @@ -3892,6 +3887,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr case nir_intrinsic_load_input: { fs_reg src = fs_reg(ATTR, instr->const_index[0], dest.type); + unsigned first_component = nir_intrinsic_component(instr); unsigned num_components = instr->num_components; enum brw_reg_type type = dest.type; @@ -3900,7 +3896,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr src = offset(src, bld, const_offset->u32[0]); for (unsigned j = 0; j < num_components; j++) { - bld.MOV(offset(dest, bld, j), offset(src, bld, j)); + bld.MOV(offset(dest, bld, j), offset(src, bld, j + first_component)); } if (type == BRW_REGISTER_TYPE_DF) { @@ -4026,6 +4022,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr new_dest = offset(new_dest, bld, const_offset->u32[0]); unsigned num_components = instr->num_components; + unsigned first_component = nir_intrinsic_component(instr); unsigned bit_size = instr->src[0].is_ssa ? instr->src[0].ssa->bit_size : instr->src[0].reg.reg->bit_size; if (bit_size == 64) { @@ -4039,7 +4036,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr } for (unsigned j = 0; j < num_components; j++) { - bld.MOV(offset(new_dest, bld, j), offset(src, bld, j)); + bld.MOV(offset(new_dest, bld, j + first_component), + offset(src, bld, j)); } break; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 156a630..6d84374 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i96
[Mesa-dev] V5 ARB_enhanced_layouts packing support for i965 Gen6+
V5: - rebase on Ken's interpolation clean-ups [1] V4: - add vec4 backend support and enable for Gen6+ V3: - Rewrite patch 9 (add support for packing arrays) to not add hacks to the type_size() functions. - Add packing support for the load_output intrinsics (patch 12) - Add glsl_dvec_type() helper (patch 8) V2: - validation fixes patches 1-2 - added support for packing doubles now that explicit location fixes have landed. - fix various issues with intel debug output with new COMPONENT const index. This adds component packing support for Gen6+. Series can be found in my component_packing_gen6+_v2 branch: https://github.com/tarceri/Mesa_arrays_of_arrays.git [1] https://patchwork.freedesktop.org/series/1/ ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 06/15] i965: add helper for creating packing writemask
For example where n=3 first_component=1 this will give us 0xE (WRITEMASK_YZW). Reviewed-by: Edward O'Callaghan --- src/mesa/drivers/dri/i965/brw_reg.h | 6 ++ 1 file changed, 6 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h index 7eab7b5..abd63e4 100644 --- a/src/mesa/drivers/dri/i965/brw_reg.h +++ b/src/mesa/drivers/dri/i965/brw_reg.h @@ -972,6 +972,12 @@ brw_writemask_for_size(unsigned n) return (1 << n) - 1; } +static inline unsigned +brw_writemask_for_component_packing(unsigned n, unsigned first_component) +{ + return (((1 << n) - 1) << first_component); +} + static inline struct brw_reg negate(struct brw_reg reg) { -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: Use tex_mocs instead of rb_mocs for GL images.
On Monday, July 18, 2016 10:58:31 PM PDT Ben Widawsky wrote: > On Mon, Jul 18, 2016 at 07:08:46PM -0700, Kenneth Graunke wrote: > > Fixes a 10-20% performance regression in OglCSDof caused by commit > > 5a8c89038abab0184ea72664ab390ec6ca58b4d6, which made images (in the > > image load/store sense) use BDW_MOCS_PTE instead of BDW_MOCS_WB. > > > > This seems sketchy, as the default PTE value is supposed to be > > WB LLC eLLC, which is the same as our MOCS WB setting. It's only > > supposed to change when using a surface for display, which won't > > ever happen for images. Something may be wrong in the kernel... > > Which platform was the regression on? Broadwell has some weirdness if you > disable PPGTT which might cause PTE MOCS to fall back to UC. I think at least > aliasing PPGTT has been enabled since pretty early for Broadwell. If there is > no > ppgtt overrides here, then I'm not sure what would be going on - it sound > sketchy to me too. My Broadwell GT2 laptop running drm-intel-nightly from 2016y-05m-27d-12h-32m-45s (4.6.0 based), and Mark's Broadwell GT3e boxes in Jenkins. I tried to verify the kernel PTE settings, but I got totally lost by the _PAGE_PAT stuff. Earlier platforms I can follow easily... This patch fixes the observed problem, but I still would like to understand why the PTE isn't good enough...for renderbuffers, we use the "Use PTE" setting...and could be missing out on some performance there... signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: Use tex_mocs instead of rb_mocs for GL images.
On Mon, Jul 18, 2016 at 07:08:46PM -0700, Kenneth Graunke wrote: > Fixes a 10-20% performance regression in OglCSDof caused by commit > 5a8c89038abab0184ea72664ab390ec6ca58b4d6, which made images (in the > image load/store sense) use BDW_MOCS_PTE instead of BDW_MOCS_WB. > > This seems sketchy, as the default PTE value is supposed to be > WB LLC eLLC, which is the same as our MOCS WB setting. It's only > supposed to change when using a surface for display, which won't > ever happen for images. Something may be wrong in the kernel... Which platform was the regression on? Broadwell has some weirdness if you disable PPGTT which might cause PTE MOCS to fall back to UC. I think at least aliasing PPGTT has been enabled since pretty early for Broadwell. If there is no ppgtt overrides here, then I'm not sure what would be going on - it sound sketchy to me too. > > Signed-off-by: Kenneth Graunke > Reviewed-by: Jason Ekstrand I haven't really followed any of the isl stuff, but it lgtm. > --- > src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > index d896789..87f8601 100644 > --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > @@ -1516,7 +1516,7 @@ update_image_surface(struct brw_context *brw, > const int surf_index = surf_offset - > &brw->wm.base.surf_offset[0]; > > brw_emit_surface_state(brw, mt, &view, > - surface_state_infos[brw->gen].rb_mocs, > false, > + surface_state_infos[brw->gen].tex_mocs, > false, > surf_offset, surf_index, > I915_GEM_DOMAIN_SAMPLER, > access == GL_READ_ONLY ? 0 : > -- > 2.9.0 > -- Ben Widawsky, Intel Open Source Technology Center ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3a/3] i965: Correctly set RENDER_SURFACE_STATE::Depth for cube map textures
On Mon, Jul 18, 2016 at 10:16:53PM -0700, Jason Ekstrand wrote: > From the Sky Lake PRM: > >"For SURFTYPE_CUBE: For Sampling Engine Surfaces and Typed Data Port >Surfaces, the range of this field is [0,340], indicating the number of >cube array elements (equal to the number of underlying 2D array elements >divided by 6). For other surfaces, this field must be zero." > > In other words, the depth field for cube maps is in number of cubes not > number of 2-D slices so we need to divide by 6. It appears as if we've > been doing this wrong ever since we first added cube map arrays for Sandy > Bridge. Also, we now need to remoe the shader hacks we've always done remove > since they were only needed because we were setting the depth field six > times too large. > > Signed-off-by: Jason Ekstrand > Cc: "12.0 11.2 11.1" > --- > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 21 + > src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 6 +- > src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 3 ++- > src/mesa/drivers/dri/i965/gen8_surface_state.c| 3 ++- > 4 files changed, 14 insertions(+), 19 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index 129984a..eeec0e2 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -4423,26 +4423,15 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, > nir_tex_instr *instr) > for (unsigned i = 0; i < dest_size; i++) >nir_dest[i] = offset(dst, bld, i); > > - bool is_cube_array = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && > -instr->is_array; > - > if (instr->op == nir_texop_query_levels) { >/* # levels is in .w */ >nir_dest[0] = offset(dst, bld, 3); > - } else if (instr->op == nir_texop_txs && dest_size >= 3 && > - (devinfo->gen < 7 || is_cube_array)) { > + } else if (instr->op == nir_texop_txs && > + dest_size >= 3 && devinfo->gen < 7) { > + /* Gen4-6 return 0 instead of 1 for single layer surfaces. */ >fs_reg depth = offset(dst, bld, 2); > - fs_reg fixed_depth = vgrf(glsl_type::int_type); > - > - if (is_cube_array) { > - /* fixup #layers for cube map arrays */ > - bld.emit(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, > brw_imm_d(6)); > - } else if (devinfo->gen < 7) { > - /* Gen4-6 return 0 instead of 1 for single layer surfaces. */ > - bld.emit_minmax(fixed_depth, depth, brw_imm_d(1), > BRW_CONDITIONAL_GE); > - } > - > - nir_dest[2] = fixed_depth; > + nir_dest[2] = vgrf(glsl_type::int_type); > + bld.emit_minmax(nir_dest[2], depth, brw_imm_d(1), BRW_CONDITIONAL_GE); > } > > bld.LOAD_PAYLOAD(get_nir_dest(instr->dest), nir_dest, dest_size, 0); > diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > index c101e05..a96eae5 100644 > --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > @@ -33,6 +33,7 @@ > #include "main/context.h" > #include "main/blend.h" > #include "main/mtypes.h" > +#include "main/teximage.h" > #include "main/samplerobj.h" > #include "main/shaderimage.h" > #include "program/prog_parameter.h" > @@ -360,8 +361,11 @@ brw_update_texture_surface(struct gl_context *ctx, > (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT | > (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT); > > + const unsigned depth = mt->logical_depth0 / > + (_mesa_is_cube_map_texture(tObj->Target) ? 6 : 1); > + > surf[3] = (brw_get_surface_tiling_bits(mt->tiling) | > - (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT | > + (depth - 1) << BRW_SURFACE_DEPTH_SHIFT | > (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT); > > const unsigned min_lod = tObj->MinLevel + tObj->BaseLevel - > mt->first_level; > diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c > b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c > index 932e62e..f4a88f3 100644 > --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c > +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c > @@ -276,7 +276,8 @@ gen7_emit_texture_surface_state(struct brw_context *brw, > int surf_index /* unused */, > bool rw, bool for_gather) > { > - const unsigned depth = max_layer - min_layer; > + const unsigned depth = (max_layer - min_layer) / > + (_mesa_is_cube_map_texture(target) ? 6 : 1); > uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, > 8 * 4, 32, surf_offset); > > diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c > b/src/mesa/drivers/dri/i965/gen8_surface_state.c > index bd9e2a1..89e
Re: [Mesa-dev] [PATCH 7/7] i965: Delete the FS_OPCODE_INTERPOLATE_AT_CENTROID virtual opcode.
On Jul 18, 2016 10:11 PM, "Chris Forbes" wrote: > > I remember arguing about this when it got added -- tradeoff was payload size/register pressure vs needing to call out to this unit, if centroid barycentric coords weren't required for anything else? It does seem fairly pointless, though. > > For the series:- > > Reviewed-by: Chris Forbes I'd like to chip in before you get too excited and push. I'll take a proper look tomorrow. > On Tue, Jul 19, 2016 at 8:26 AM, Kenneth Graunke wrote: >> >> We no longer use this message. As far as I can tell, it's fairly >> useless - the equivalent information is provided in the payload. >> >> Signed-off-by: Kenneth Graunke >> --- >> src/mesa/drivers/dri/i965/brw_defines.h| 1 - >> src/mesa/drivers/dri/i965/brw_fs.cpp | 2 -- >> src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 5 - >> src/mesa/drivers/dri/i965/brw_shader.cpp | 2 -- >> 4 files changed, 10 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h >> index b5a259e..2814fa7 100644 >> --- a/src/mesa/drivers/dri/i965/brw_defines.h >> +++ b/src/mesa/drivers/dri/i965/brw_defines.h >> @@ -1120,7 +1120,6 @@ enum opcode { >> FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, >> FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, >> FS_OPCODE_PLACEHOLDER_HALT, >> - FS_OPCODE_INTERPOLATE_AT_CENTROID, >> FS_OPCODE_INTERPOLATE_AT_SAMPLE, >> FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, >> FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, >> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp >> index 06007fe..120d6dd 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp >> @@ -250,7 +250,6 @@ fs_inst::is_send_from_grf() const >> switch (opcode) { >> case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: >> case SHADER_OPCODE_SHADER_TIME_ADD: >> - case FS_OPCODE_INTERPOLATE_AT_CENTROID: >> case FS_OPCODE_INTERPOLATE_AT_SAMPLE: >> case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: >> case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: >> @@ -4785,7 +4784,6 @@ get_lowered_simd_width(const struct brw_device_info *devinfo, >> case FS_OPCODE_PACK_HALF_2x16_SPLIT: >> case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X: >> case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y: >> - case FS_OPCODE_INTERPOLATE_AT_CENTROID: >> case FS_OPCODE_INTERPOLATE_AT_SAMPLE: >> case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: >> case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: >> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp >> index 1e9c7da..a390184 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp >> @@ -2054,11 +2054,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) >> } >> break; >> >> - case FS_OPCODE_INTERPOLATE_AT_CENTROID: >> - generate_pixel_interpolator_query(inst, dst, src[0], src[1], >> - GEN7_PIXEL_INTERPOLATOR_LOC_CENTROID); >> - break; >> - >>case FS_OPCODE_INTERPOLATE_AT_SAMPLE: >> generate_pixel_interpolator_query(inst, dst, src[0], src[1], >> GEN7_PIXEL_INTERPOLATOR_LOC_SAMPLE); >> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp >> index f3b5487..559e44c 100644 >> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp >> @@ -367,8 +367,6 @@ brw_instruction_name(const struct brw_device_info *devinfo, enum opcode op) >> case FS_OPCODE_PLACEHOLDER_HALT: >>return "placeholder_halt"; >> >> - case FS_OPCODE_INTERPOLATE_AT_CENTROID: >> - return "interp_centroid"; >> case FS_OPCODE_INTERPOLATE_AT_SAMPLE: >>return "interp_sample"; >> case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: >> -- >> 2.9.0 >> >> ___ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/3] i965: Correctly set RENDER_SURFACE_STATE::Depth for
Oh dear, hacks on both sides. Sorry for this nonsense. Series is:- Reviewed-by: Chris Forbes On Tue, Jul 19, 2016 at 5:16 PM, Jason Ekstrand wrote: > From the Sky Lake PRM: > >"For SURFTYPE_CUBE: For Sampling Engine Surfaces and Typed Data Port >Surfaces, the range of this field is [0,340], indicating the number of >cube array elements (equal to the number of underlying 2D array elements >divided by 6). For other surfaces, this field must be zero." > > In other words, the depth field for cube maps is in number of cubes not > number of 2-D slices so we need to divide by 6. It appears as if we've > been doing this wrong ever since we first added cube map arrays for Sandy > Bridge. We've also had a shader hack to divide the size Z dimension of > cube maps by 6 in the textureSize call. This is completely bogus and the > only reason for it is that we've been setting the depth six times too > large. > > This little series fixes this. In order to keep things back-portable, > patch 3 comes in two versions. Version (a) is based on pre-ISL and should > be backportable to 12.0 or maybe even 11.2 or 11.1. Version (b) on the > other hand is based on top of the ISL work and can be applied on master. > > Cc: Emil Velikov > > Jason Ekstrand (3): > i965: Use intel_get_image_dims in alloc_texture_storage > i965/miptree: Set logical_depth0 == 6 for cube maps > i965: Correctly set RENDER_SURFACE_STATE::Depth for cube map textures > > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 21 > + > src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 6 +- > src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 3 ++- > src/mesa/drivers/dri/i965/gen8_surface_state.c| 3 ++- > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 14 ++ > src/mesa/drivers/dri/i965/intel_tex.c | 2 ++ > 6 files changed, 26 insertions(+), 23 deletions(-) > > -- > 2.5.0.400.gff86faf > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] i965/miptree: Set logical_depth0 == 6 for cube maps
This matches what we do for cube maps where logical_depth0 is in number of face-layers rather than number of cubes. This does mean that we will temporarily be setting the surface bounds too loose for cube map textures but we are already setting them too loose for cube arrays and we will be fixing that in the next commit anyway. Signed-off-by: Jason Ekstrand Cc: "12.0 11.2 11.1" --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 14 ++ 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index b6265dc..fd20f3f 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -518,10 +518,8 @@ intel_miptree_create_layout(struct brw_context *brw, } } - if (target == GL_TEXTURE_CUBE_MAP) { - assert(depth0 == 1); - depth0 = 6; - } + if (target == GL_TEXTURE_CUBE_MAP) + assert(depth0 == 6); mt->physical_width0 = width0; mt->physical_height0 = height0; @@ -1054,6 +1052,14 @@ intel_get_image_dims(struct gl_texture_image *image, *height = 1; *depth = image->Height; break; + case GL_TEXTURE_CUBE_MAP: + /* For Cube maps, the mesa/main api layer gives us a depth of 1 even + * though we really have 6 slices. + */ + *width = image->Width; + *height = image->Height; + *depth = 6; + break; default: *width = image->Width; *height = image->Height; -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3a/3] i965: Correctly set RENDER_SURFACE_STATE::Depth for cube map textures
From the Sky Lake PRM: "For SURFTYPE_CUBE: For Sampling Engine Surfaces and Typed Data Port Surfaces, the range of this field is [0,340], indicating the number of cube array elements (equal to the number of underlying 2D array elements divided by 6). For other surfaces, this field must be zero." In other words, the depth field for cube maps is in number of cubes not number of 2-D slices so we need to divide by 6. It appears as if we've been doing this wrong ever since we first added cube map arrays for Sandy Bridge. Also, we now need to remoe the shader hacks we've always done since they were only needed because we were setting the depth field six times too large. Signed-off-by: Jason Ekstrand Cc: "12.0 11.2 11.1" --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 21 + src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 6 +- src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 3 ++- src/mesa/drivers/dri/i965/gen8_surface_state.c| 3 ++- 4 files changed, 14 insertions(+), 19 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 129984a..eeec0e2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -4423,26 +4423,15 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) for (unsigned i = 0; i < dest_size; i++) nir_dest[i] = offset(dst, bld, i); - bool is_cube_array = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && -instr->is_array; - if (instr->op == nir_texop_query_levels) { /* # levels is in .w */ nir_dest[0] = offset(dst, bld, 3); - } else if (instr->op == nir_texop_txs && dest_size >= 3 && - (devinfo->gen < 7 || is_cube_array)) { + } else if (instr->op == nir_texop_txs && + dest_size >= 3 && devinfo->gen < 7) { + /* Gen4-6 return 0 instead of 1 for single layer surfaces. */ fs_reg depth = offset(dst, bld, 2); - fs_reg fixed_depth = vgrf(glsl_type::int_type); - - if (is_cube_array) { - /* fixup #layers for cube map arrays */ - bld.emit(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, brw_imm_d(6)); - } else if (devinfo->gen < 7) { - /* Gen4-6 return 0 instead of 1 for single layer surfaces. */ - bld.emit_minmax(fixed_depth, depth, brw_imm_d(1), BRW_CONDITIONAL_GE); - } - - nir_dest[2] = fixed_depth; + nir_dest[2] = vgrf(glsl_type::int_type); + bld.emit_minmax(nir_dest[2], depth, brw_imm_d(1), BRW_CONDITIONAL_GE); } bld.LOAD_PAYLOAD(get_nir_dest(instr->dest), nir_dest, dest_size, 0); diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index c101e05..a96eae5 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -33,6 +33,7 @@ #include "main/context.h" #include "main/blend.h" #include "main/mtypes.h" +#include "main/teximage.h" #include "main/samplerobj.h" #include "main/shaderimage.h" #include "program/prog_parameter.h" @@ -360,8 +361,11 @@ brw_update_texture_surface(struct gl_context *ctx, (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT | (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT); + const unsigned depth = mt->logical_depth0 / + (_mesa_is_cube_map_texture(tObj->Target) ? 6 : 1); + surf[3] = (brw_get_surface_tiling_bits(mt->tiling) | - (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT | + (depth - 1) << BRW_SURFACE_DEPTH_SHIFT | (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT); const unsigned min_lod = tObj->MinLevel + tObj->BaseLevel - mt->first_level; diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c index 932e62e..f4a88f3 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c @@ -276,7 +276,8 @@ gen7_emit_texture_surface_state(struct brw_context *brw, int surf_index /* unused */, bool rw, bool for_gather) { - const unsigned depth = max_layer - min_layer; + const unsigned depth = (max_layer - min_layer) / + (_mesa_is_cube_map_texture(target) ? 6 : 1); uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 8 * 4, 32, surf_offset); diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index bd9e2a1..89ea8cc 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -235,7 +235,8 @@ gen8_emit_texture_surface_state(struct brw_context *brw, uint32_t *surf_offset, int surf_index, bool rw, bool
[Mesa-dev] [PATCH 3b/3] i965: Stop muging cube array lengths by 6
From the Sky Lake PRM: "For SURFTYPE_CUBE: For Sampling Engine Surfaces and Typed Data Port Surfaces, the range of this field is [0,340], indicating the number of cube array elements (equal to the number of underlying 2D array elements divided by 6). For other surfaces, this field must be zero." In other words, the depth field for cube maps is in number of cubes not number of 2-D slices so we need to divide by 6. ISL will do this correctly for us assuming that we provide it with the correct array bounds which it expects to be in 2-D slices. It appears as if we've been doing this wrong ever since we first added cube map arrays for Sandy Bridge and the change to ISL made things slightly worse. While we're at it, we now need to remoe the shader hacks we've always done since they were only needed because we were setting the depth field six times too large. Signed-off-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 21 + src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 4 +--- 2 files changed, 6 insertions(+), 19 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 6265dc6..14a8adb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -4457,26 +4457,15 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) for (unsigned i = 0; i < dest_size; i++) nir_dest[i] = offset(dst, bld, i); - bool is_cube_array = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && -instr->is_array; - if (instr->op == nir_texop_query_levels) { /* # levels is in .w */ nir_dest[0] = offset(dst, bld, 3); - } else if (instr->op == nir_texop_txs && dest_size >= 3 && - (devinfo->gen < 7 || is_cube_array)) { + } else if (instr->op == nir_texop_txs && + dest_size >= 3 && devinfo->gen < 7) { + /* Gen4-6 return 0 instead of 1 for single layer surfaces. */ fs_reg depth = offset(dst, bld, 2); - fs_reg fixed_depth = vgrf(glsl_type::int_type); - - if (is_cube_array) { - /* fixup #layers for cube map arrays */ - bld.emit(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, brw_imm_d(6)); - } else if (devinfo->gen < 7) { - /* Gen4-6 return 0 instead of 1 for single layer surfaces. */ - bld.emit_minmax(fixed_depth, depth, brw_imm_d(1), BRW_CONDITIONAL_GE); - } - - nir_dest[2] = fixed_depth; + nir_dest[2] = vgrf(glsl_type::int_type); + bld.emit_minmax(nir_dest[2], depth, brw_imm_d(1), BRW_CONDITIONAL_GE); } bld.LOAD_PAYLOAD(get_nir_dest(instr->dest), nir_dest, dest_size, 0); diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index d896789..e5a3acf 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -396,11 +396,9 @@ brw_update_texture_surface(struct gl_context *ctx, /* If this is a view with restricted NumLayers, then our effective depth * is not just the miptree depth. */ - const unsigned mt_num_layers = - mt->logical_depth0 * (_mesa_is_cube_map_texture(mt->target) ? 6 : 1); const unsigned view_num_layers = (obj->Immutable && obj->Target != GL_TEXTURE_3D) ? obj->NumLayers : -mt_num_layers; +mt->logical_depth0; /* Handling GL_ALPHA as a surface format override breaks 1.30+ style * texturing functions that return a float, as our code generation always -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/3] i965: Use intel_get_image_dims in alloc_texture_storage
The intel_get_image_dims helper function handles some image dimension sanitization for us for things such as 1-D array textures. We should probably be using it here. Signed-off-by: Jason Ekstrand Cc: "12.0 11.2 11.1" --- src/mesa/drivers/dri/i965/intel_tex.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i965/intel_tex.c b/src/mesa/drivers/dri/i965/intel_tex.c index 8c32fe3..d3e24f4 100644 --- a/src/mesa/drivers/dri/i965/intel_tex.c +++ b/src/mesa/drivers/dri/i965/intel_tex.c @@ -141,6 +141,8 @@ intel_alloc_texture_storage(struct gl_context *ctx, !intel_miptree_match_image(intel_texobj->mt, first_image) || intel_texobj->mt->last_level != levels - 1) { intel_miptree_release(&intel_texobj->mt); + + intel_get_image_dims(first_image, &width, &height, &depth); intel_texobj->mt = intel_miptree_create(brw, texobj->Target, first_image->TexFormat, 0, levels - 1, -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/3] i965: Correctly set RENDER_SURFACE_STATE::Depth for
From the Sky Lake PRM: "For SURFTYPE_CUBE: For Sampling Engine Surfaces and Typed Data Port Surfaces, the range of this field is [0,340], indicating the number of cube array elements (equal to the number of underlying 2D array elements divided by 6). For other surfaces, this field must be zero." In other words, the depth field for cube maps is in number of cubes not number of 2-D slices so we need to divide by 6. It appears as if we've been doing this wrong ever since we first added cube map arrays for Sandy Bridge. We've also had a shader hack to divide the size Z dimension of cube maps by 6 in the textureSize call. This is completely bogus and the only reason for it is that we've been setting the depth six times too large. This little series fixes this. In order to keep things back-portable, patch 3 comes in two versions. Version (a) is based on pre-ISL and should be backportable to 12.0 or maybe even 11.2 or 11.1. Version (b) on the other hand is based on top of the ISL work and can be applied on master. Cc: Emil Velikov Jason Ekstrand (3): i965: Use intel_get_image_dims in alloc_texture_storage i965/miptree: Set logical_depth0 == 6 for cube maps i965: Correctly set RENDER_SURFACE_STATE::Depth for cube map textures src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 21 + src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 6 +- src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 3 ++- src/mesa/drivers/dri/i965/gen8_surface_state.c| 3 ++- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 14 ++ src/mesa/drivers/dri/i965/intel_tex.c | 2 ++ 6 files changed, 26 insertions(+), 23 deletions(-) -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 96950] Another regression from bc4e0c486: vbo: Use a bitmask to track the active arrays in vbo_exec*.
https://bugs.freedesktop.org/show_bug.cgi?id=96950 --- Comment #5 from Mathias Fröhlich --- Or Can you provide an apitrace? I have no such system to reproduce at hands. -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 7/7] i965: Delete the FS_OPCODE_INTERPOLATE_AT_CENTROID virtual opcode.
I remember arguing about this when it got added -- tradeoff was payload size/register pressure vs needing to call out to this unit, if centroid barycentric coords weren't required for anything else? It does seem fairly pointless, though. For the series:- Reviewed-by: Chris Forbes On Tue, Jul 19, 2016 at 8:26 AM, Kenneth Graunke wrote: > We no longer use this message. As far as I can tell, it's fairly > useless - the equivalent information is provided in the payload. > > Signed-off-by: Kenneth Graunke > --- > src/mesa/drivers/dri/i965/brw_defines.h| 1 - > src/mesa/drivers/dri/i965/brw_fs.cpp | 2 -- > src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 5 - > src/mesa/drivers/dri/i965/brw_shader.cpp | 2 -- > 4 files changed, 10 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h > b/src/mesa/drivers/dri/i965/brw_defines.h > index b5a259e..2814fa7 100644 > --- a/src/mesa/drivers/dri/i965/brw_defines.h > +++ b/src/mesa/drivers/dri/i965/brw_defines.h > @@ -1120,7 +1120,6 @@ enum opcode { > FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, > FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, > FS_OPCODE_PLACEHOLDER_HALT, > - FS_OPCODE_INTERPOLATE_AT_CENTROID, > FS_OPCODE_INTERPOLATE_AT_SAMPLE, > FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, > FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index 06007fe..120d6dd 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -250,7 +250,6 @@ fs_inst::is_send_from_grf() const > switch (opcode) { > case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: > case SHADER_OPCODE_SHADER_TIME_ADD: > - case FS_OPCODE_INTERPOLATE_AT_CENTROID: > case FS_OPCODE_INTERPOLATE_AT_SAMPLE: > case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: > case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: > @@ -4785,7 +4784,6 @@ get_lowered_simd_width(const struct brw_device_info > *devinfo, > case FS_OPCODE_PACK_HALF_2x16_SPLIT: > case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X: > case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y: > - case FS_OPCODE_INTERPOLATE_AT_CENTROID: > case FS_OPCODE_INTERPOLATE_AT_SAMPLE: > case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: > case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: > diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > index 1e9c7da..a390184 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > @@ -2054,11 +2054,6 @@ fs_generator::generate_code(const cfg_t *cfg, int > dispatch_width) > } > break; > > - case FS_OPCODE_INTERPOLATE_AT_CENTROID: > - generate_pixel_interpolator_query(inst, dst, src[0], src[1], > - > GEN7_PIXEL_INTERPOLATOR_LOC_CENTROID); > - break; > - >case FS_OPCODE_INTERPOLATE_AT_SAMPLE: > generate_pixel_interpolator_query(inst, dst, src[0], src[1], > > GEN7_PIXEL_INTERPOLATOR_LOC_SAMPLE); > diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp > b/src/mesa/drivers/dri/i965/brw_shader.cpp > index f3b5487..559e44c 100644 > --- a/src/mesa/drivers/dri/i965/brw_shader.cpp > +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp > @@ -367,8 +367,6 @@ brw_instruction_name(const struct brw_device_info > *devinfo, enum opcode op) > case FS_OPCODE_PLACEHOLDER_HALT: >return "placeholder_halt"; > > - case FS_OPCODE_INTERPOLATE_AT_CENTROID: > - return "interp_centroid"; > case FS_OPCODE_INTERPOLATE_AT_SAMPLE: >return "interp_sample"; > case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: > -- > 2.9.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/7] nir: Add a nir_lower_io flag for using load_interpolated_input intrins.
Seems a little unfortunate to add a random bool to this interface which is otherwise fairly descriptive, but OK. On Tue, Jul 19, 2016 at 8:26 AM, Kenneth Graunke wrote: > While my intention is that the new intrinsics should be usable by all > drivers, we need to make them optional until all drivers switch. > > This doesn't do anything yet, but I added it as a separate patch to > keep the interface churn separate for easier review. > > Signed-off-by: Kenneth Graunke > --- > src/compiler/nir/nir.h | 3 ++- > src/compiler/nir/nir_lower_io.c | 15 +++ > src/gallium/drivers/freedreno/ir3/ir3_cmdline.c | 2 +- > src/mesa/drivers/dri/i965/brw_blorp.c | 2 +- > src/mesa/drivers/dri/i965/brw_nir.c | 18 +- > src/mesa/drivers/dri/i965/brw_program.c | 4 ++-- > src/mesa/state_tracker/st_glsl_to_nir.cpp | 2 +- > 7 files changed, 27 insertions(+), 19 deletions(-) > > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h > index ac11998..e996e0e 100644 > --- a/src/compiler/nir/nir.h > +++ b/src/compiler/nir/nir.h > @@ -2324,7 +2324,8 @@ void nir_assign_var_locations(struct exec_list > *var_list, unsigned *size, > > void nir_lower_io(nir_shader *shader, >nir_variable_mode modes, > - int (*type_size)(const struct glsl_type *)); > + int (*type_size)(const struct glsl_type *), > + bool use_load_interpolated_input_intrinsics); > nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr); > nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr); > > diff --git a/src/compiler/nir/nir_lower_io.c > b/src/compiler/nir/nir_lower_io.c > index b05a73f..aa8a517 100644 > --- a/src/compiler/nir/nir_lower_io.c > +++ b/src/compiler/nir/nir_lower_io.c > @@ -39,6 +39,7 @@ struct lower_io_state { > void *mem_ctx; > int (*type_size)(const struct glsl_type *type); > nir_variable_mode modes; > + bool use_interpolated_input; > }; > > void > @@ -394,7 +395,8 @@ nir_lower_io_block(nir_block *block, > static void > nir_lower_io_impl(nir_function_impl *impl, >nir_variable_mode modes, > - int (*type_size)(const struct glsl_type *)) > + int (*type_size)(const struct glsl_type *), > + bool use_interpolated_input) > { > struct lower_io_state state; > > @@ -402,6 +404,7 @@ nir_lower_io_impl(nir_function_impl *impl, > state.mem_ctx = ralloc_parent(impl); > state.modes = modes; > state.type_size = type_size; > + state.use_interpolated_input = use_interpolated_input; > > nir_foreach_block(block, impl) { >nir_lower_io_block(block, &state); > @@ -413,11 +416,15 @@ nir_lower_io_impl(nir_function_impl *impl, > > void > nir_lower_io(nir_shader *shader, nir_variable_mode modes, > - int (*type_size)(const struct glsl_type *)) > + int (*type_size)(const struct glsl_type *), > + bool use_interpolated_input) > { > nir_foreach_function(function, shader) { > - if (function->impl) > - nir_lower_io_impl(function->impl, modes, type_size); > + if (function->impl) { > + nir_lower_io_impl(function->impl, modes, type_size, > + use_interpolated_input && > + shader->stage == MESA_SHADER_FRAGMENT); > + } > } > } > > diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c > b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c > index 41532fc..a8a8c1b 100644 > --- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c > +++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c > @@ -93,7 +93,7 @@ load_glsl(unsigned num_files, char* const* files, > gl_shader_stage stage) > // TODO nir_assign_var_locations?? > > NIR_PASS_V(nir, nir_lower_system_values); > - NIR_PASS_V(nir, nir_lower_io, nir_var_all, st_glsl_type_size); > + NIR_PASS_V(nir, nir_lower_io, nir_var_all, st_glsl_type_size, > false); > NIR_PASS_V(nir, nir_lower_samplers, prog); > > return nir; > diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c > b/src/mesa/drivers/dri/i965/brw_blorp.c > index 282a5b2..0473cfe 100644 > --- a/src/mesa/drivers/dri/i965/brw_blorp.c > +++ b/src/mesa/drivers/dri/i965/brw_blorp.c > @@ -209,7 +209,7 @@ brw_blorp_compile_nir_shader(struct brw_context *brw, > struct nir_shader *nir, >unsigned end = var->data.location + > nir_uniform_type_size(var->type); >nir->num_uniforms = MAX2(nir->num_uniforms, end); > } > - nir_lower_io(nir, nir_var_uniform, nir_uniform_type_size); > + nir_lower_io(nir, nir_var_uniform, nir_uniform_type_size, false); > > const unsigned *program = >brw_compile_fs(compiler, brw, mem_ctx, wm_key, &wm_prog_data, nir, > diff --git a/src/mesa/drivers/dri/i965/brw_nir.c > b/src/mesa/drivers/dri/i965/brw_nir.c > index 6c3e1d1..caf9fe0 100644 > --- a/src/mesa/dri
Re: [Mesa-dev] [PATCH 6/7] i965: Rewrite FS input handling to use the new NIR intrinsics.
On Tue, Jul 19, 2016 at 8:26 AM, Kenneth Graunke wrote: > + default: > + assert(!"invalid intrinsic"); > unreachable() ? ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 00/10] egl/android: Improve the Android EGL backend
On Tue, Jul 19, 2016 at 12:35 PM, Rob Herring wrote: > On Fri, Jul 15, 2016 at 2:53 AM, Tomasz Figa wrote: >> Hi, >> >> This series is a collection of various fixes and extensions we came up >> with during our attempt to use Mesa for Android. >> >> Fixes included in this series: >> - added mandatory EGL_MAX_PBUFFER_WIDTH and _HEIGHT attributes to EGL >>configs, >> - fixed multiple issues with handling pbuffers in the backend, >> - found and fixed a DRI image leak, >> - made the implementation of DRI image loader .getBuffers callback >>conform better to the extension semantics. >> >> New features added by this series: >> - possibility to build the Android EGL platform without drm_gralloc >>headers, >> - support for creating EGL images from Android native buffers with >>YV12 pixel format (prime-only), >> - fallback to kms_swrast driver when no hardware driver can be loaded >>but there is still some usable DRI node present in the system. >> - more logging in case of errors to help diagnosing problems. >> >> Testing was done using classic i965 (gen 8) and gallium softpipe drivers >> on an internal build of Android, based on gralloc backed by a DRM render >> node and sharing buffers by PRIME FDs. > > I've tested out patches 1-6 with virgl and I don't get anything > displayed. I get this message: > > EGL-DRI2: Front buffer is not supported for window surfaces > > That's as far as I investigated. I'll look into it some more tomorrow. Thanks a lot for testing! It looks like somehow your driver (or gallium) is triggering a call to DRI image loader getBuffers() callback with front buffer bit set in the image mask, but window surfaces on Android provide only back buffers. My understanding of the semantics was that the callback should deny such requests, so that's how I implemented it. However it isn't really well documented, so potentially it should only provide buffers that are available and ignore the rest without bailing out. Could someone more familiar with this extension comment on this? > > Patches 7-10 wouldn't apply. Do you have a git tree with the series? Hmm, I rebased them on Mesa master just before sending. Let me try to create a sandbox branch in our chromium tree. Best regards, Tomasz ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 00/10] egl/android: Improve the Android EGL backend
On Fri, Jul 15, 2016 at 2:53 AM, Tomasz Figa wrote: > Hi, > > This series is a collection of various fixes and extensions we came up > with during our attempt to use Mesa for Android. > > Fixes included in this series: > - added mandatory EGL_MAX_PBUFFER_WIDTH and _HEIGHT attributes to EGL >configs, > - fixed multiple issues with handling pbuffers in the backend, > - found and fixed a DRI image leak, > - made the implementation of DRI image loader .getBuffers callback >conform better to the extension semantics. > > New features added by this series: > - possibility to build the Android EGL platform without drm_gralloc >headers, > - support for creating EGL images from Android native buffers with >YV12 pixel format (prime-only), > - fallback to kms_swrast driver when no hardware driver can be loaded >but there is still some usable DRI node present in the system. > - more logging in case of errors to help diagnosing problems. > > Testing was done using classic i965 (gen 8) and gallium softpipe drivers > on an internal build of Android, based on gralloc backed by a DRM render > node and sharing buffers by PRIME FDs. I've tested out patches 1-6 with virgl and I don't get anything displayed. I get this message: EGL-DRI2: Front buffer is not supported for window surfaces That's as far as I investigated. I'll look into it some more tomorrow. Patches 7-10 wouldn't apply. Do you have a git tree with the series? Rob ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 07/10] egl/android: Make drm_gralloc headers optional
On Tue, Jul 19, 2016 at 2:35 AM, Emil Velikov wrote: > On 18 July 2016 at 16:38, Tomasz Figa wrote: >> On Mon, Jul 18, 2016 at 11:58 PM, Emil Velikov >> wrote: >>> On 18 July 2016 at 13:02, Tomasz Figa wrote: On Mon, Jul 18, 2016 at 7:28 PM, Emil Velikov wrote: > Hi Tomasz, > > On 15 July 2016 at 08:53, Tomasz Figa wrote: > >> +#define DRM_RENDER_DEV_NAME "%s/renderD%d" >> + >> +static int >> +droid_open_device(_EGLDisplay *dpy) >> +{ >> + struct dri2_egl_display *dri2_dpy = dpy->DriverData; >> + const int limit = 64; >> + const int base = 128; >> + int fd; >> + int i; >> + >> + for (i = 0; i < limit; ++i) { >> + char *card_path; >> + if (asprintf(&card_path, DRM_RENDER_DEV_NAME, DRM_DIR_NAME, base >> + i) < 0) > Why do we need any of this ? What gralloc implementation are you guys > using ? We are using our heavily rewritten fork of some old drm_gralloc release. It supports only render nodes and PRIME FDs and doesn't export the DRI device FD outside of its internals (which isn't actually even fully correct, at least for PRIME and render nodes, see my reply to Rob's comments). >>> That explain it, since https://chromium.googlesource.com/ does not >>> have gralloc, and >>> https://android.googlesource.com/platform/external/drm_gralloc/ has >>> both the DRM_FD define and the gem/flink function(s)? >>> >>> Can I suggest porting the fd drm_gralloc/gbm_gralloc patches to your >>> private copy/repo. This way we'll have some consistency throughout >>> gralloc implementations >> >> I'd prefer if any code using flink names was not added back. On top of >> that, our drm_gralloc doesn't really have much in common with that >> from android-x86 anymore (as I said, it was heavily rewritten) and >> there is not even a chance that with its current design flink names >> could even work. >> >> Also I'm wondering why we want to consider current brokenness of >> drm_gralloc as something to be consistent with. It's supposed to be a >> HAL library providing an uniform abstraction, but it exports private >> APIs on the side instead. Moreover, as I mentioned before, flink names >> are considered insecure and it would be really much better if we could >> just forget about them. >> >>> and you can use gbm_gralloc directly in the >>> (hopefully) not too distant future. >> >> I agree with this part, though. gbm_gralloc is definitely something >> that we might want to migrate to in the future. Although it's a bit >> lacking at the moment, so it might need a bit more time to develop the >> missing bits. [I'm CCing Gurchetan, who was investigating GBM-backed >> gralloc usable for our purposes.] >> >> In any case, the missing flink API is quite easy to handle and can be >> just stubbed out in a local header as you suggested. I don't think it >> would hurt anyone and would definitely help us and anyone not willing >> to export any private APIs from their gralloc and rely only on the >> public HAL API. >> > Looks like I wasn't clear enough here, realyl sorry about that. No > objection on nuking _any_ of the gem/flink paths, but hoping to have > the behaviour consistent with the one described in > get_native_buffer_fd. Did you mean having the PRIME FD in native_handle_t::data[0]? If so, it's more or less guaranteed by the API, because all file descriptors in handle have to be stored in first N (equals to native_handle_t::numFds) ints of native_handle_t::data[] for respective general code to properly transfer the FDs through binder when sharing between processes. Our gralloc currently supports only one PRIME FD per buffer (no separate memory planes for planar YUV) and stores it exactly in native_handle_t::data[0]. > >>> > > Afaict the latter must provide reasonable result for > hw_get_module(GRALLOC_HARDWARE_MODULE_ID...) and as it's missing the > perform hook existing code should work just fine. Right ? Existing code would fail with -1 as file descriptor, wouldn't it? Or I'm failing to see something? >>> Nope you're spot on - I had a dull moment. May I suggest revering the >>> patch which removed the GRALLOC_MODULE_PERFORM_GET_DRM_FD handling in >>> your gralloc ? Reason being is that the proposed code is very 'flaky' >>> and can open the wrong render node on systems which have more than >>> one. >> >> I think the answer is a bit of yes and no at the same time. >> >> Starting with no, it's incorrect for gralloc to share the DRI device >> FD with Mesa for multiple reasons: >> - there are cases when the allocator used is different that the render node, > Can you please provide an example how the current open-source > gralloc/EGL stack might hit this ? Only a mix of closed and > open-source components comes to mind :-\ Well, yes. I don't think I can hide the fact that we have to use closed source components on some platforms. To put it simply,
Re: [Mesa-dev] Required Mako version? (WAS: mesa from git fails to compile)
On Monday, July 18, 2016 10:58:25 PM PDT Pali Rohár wrote: > Any conclusion or fix for this issue? Dylan suggested you use pip --user to install a newer version of Mako. Is that a workable solution to your problem? I'm having a hard time getting excited about making upstream support building against 2011-era software, when doing so makes transitioning to Python 3 harder (which is useful for forward-looking distros). Especially when Debian stable and the last two Ubuntu LTS releases (2016 and 2014) ship a recent enough version...it's just the LTS three back (2012) that doesn't work... --Ken signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallium/util: Fix debug_printf under Haiku
July 18 2016 1:10 PM, "Emil Velikov" wrote: > On 18 July 2016 at 16:28, Alexander von Gluck IV > wrote: > >> July 18 2016 9:20 AM, "Emil Velikov" wrote: >>> On 18 July 2016 at 14:39, Alexander von Gluck IV >>> wrote: >>> July 18 2016 3:29 AM, "Nicolai Hähnle" wrote: > A comment further up in the same file says > > /* Haiku provides debug_printf in libroot with OS.h */ > > Is that no longer true? > > Nicolai > > On 16.07.2016 16:27, Alexander von Gluck IV wrote: > >> --- >> src/gallium/auxiliary/util/u_debug.h | 5 - >> 1 file changed, 4 insertions(+), 1 deletion(-) >> >> diff --git a/src/gallium/auxiliary/util/u_debug.h >> b/src/gallium/auxiliary/util/u_debug.h >> index 7da7f53..7dc4ce8 100644 >> --- a/src/gallium/auxiliary/util/u_debug.h >> +++ b/src/gallium/auxiliary/util/u_debug.h >> @@ -83,7 +83,10 @@ _debug_printf(const char *format, ...) >> * - avoid outputing large strings (512 bytes is the current maximum >> length >> * that is guaranteed to be printed in all platforms) >> */ >> -#if !defined(PIPE_OS_HAIKU) >> +#if defined(PIPE_OS_HAIKU) >> +void >> +debug_printf(const char *format, ...) _util_printf_format(1,2); >> +#else >> static inline void >> debug_printf(const char *format, ...) _util_printf_format(1,2); >>> >>> Hmm I moved the include further up with commit >>> 373f118c6c750d717fd0727fc3fc191828714c6f although that should not have >>> made any difference, barring fragile include file order. Can you check >>> if reverting the u_debug.h gets you up and running ? If so can you >>> please: >>> - Please add the stable tag Cc: >>> - Attempt to straighten the includes (it might be mesa, llvm and/or >>> Haiku that is getting confused) >>> It's still true, however without the _util_printf_format I get odd llvm symbol errors. >>> >>> I would suspect that the above is in play, but without details >>> (build/error log) little to no one will be able to tell you if this is >>> the correct fix, I'm afraid. >> >> gcc 5.4.0 / llvm 3.8.0 >> Sorry, I wasn't near the machine, here is the error without any changes: >> >> src/gallium/auxiliary/gallivm/lp_bld_assert.c: In function 'lp_assert': >> src/gallium/auxiliary/gallivm/lp_bld_assert.c:43:7: warning: implicit >> declaration of function >> 'debug_printf' [-Wimplicit-function-declaration] >> debug_printf("LLVM assertion '%s' failed!\n", msg); >> ^ > > Ok, this happens as PIPE_OS_HAIKU isn't defined that early in > u_debug.h, thus the header is not included > >> Compiling src/gallium/auxiliary/gallivm/lp_bld_const.c ... >> Compiling src/gallium/auxiliary/gallivm/lp_bld_conv.c ... >> Compiling src/gallium/auxiliary/gallivm/lp_bld_debug.cpp ... >> Compiling src/gallium/auxiliary/gallivm/lp_bld_flow.c ... >> Compiling src/gallium/auxiliary/gallivm/lp_bld_format_aos_array.c ... >> Compiling src/gallium/auxiliary/gallivm/lp_bld_format_aos.c ... >> Compiling src/gallium/auxiliary/gallivm/lp_bld_format_cached.c ... >> Compiling src/gallium/auxiliary/gallivm/lp_bld_format_float.c ... >> Compiling src/gallium/auxiliary/gallivm/lp_bld_format.c ... >> Compiling src/gallium/auxiliary/gallivm/lp_bld_format_soa.c ... >> Compiling src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c ... >> Compiling src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c ... >> Compiling src/gallium/auxiliary/gallivm/lp_bld_gather.c ... >> Compiling src/gallium/auxiliary/gallivm/lp_bld_init.c ... >> Compiling src/gallium/auxiliary/gallivm/lp_bld_intr.c ... >> src/gallium/auxiliary/gallivm/lp_bld_intr.c: In function >> 'lp_build_intrinsic_binary_anylength': >> src/gallium/auxiliary/gallivm/lp_bld_intr.c:252:10: warning: implicit >> declaration of function >> 'debug_printf' [-Wimplicit-function-declaration] >> debug_printf("%s: should handle arbitrary vector size\n", >> ^ >> Compiling src/gallium/auxiliary/gallivm/lp_bld_logic.c ... >> Compiling src/gallium/auxiliary/gallivm/lp_bld_misc.cpp ... >> Compiling src/gallium/auxiliary/gallivm/lp_bld_pack.c ... >> Compiling src/gallium/auxiliary/gallivm/lp_bld_printf.c ... >> src/gallium/auxiliary/gallivm/lp_bld_printf.c: In function >> 'lp_build_print_args': >> src/gallium/auxiliary/gallivm/lp_bld_printf.c:68:84: error: 'debug_printf' >> undeclared (first use in >> this function) >> func_printf = lp_build_const_int_pointer(gallivm, >> func_to_pointer((func_pointer)debug_printf)); >> ^ >> src/gallium/auxiliary/gallivm/lp_bld_printf.c:68:84: note: each undeclared >> identifier is reported >> only once for each function it appears in >> scons: *** >> [build/haiku-x86_64-debug/gallium/auxiliary/gallivm/lp_bld_printf.os] Error 1 >> >> debug_printf is definitely declared however (and it should be all c code, no >> C++ thus no mangling) >> The linux code just below defines debug_printf twice as well: https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/auxiliary/util/
[Mesa-dev] [PATCH] i965: Use tex_mocs instead of rb_mocs for GL images.
Fixes a 10-20% performance regression in OglCSDof caused by commit 5a8c89038abab0184ea72664ab390ec6ca58b4d6, which made images (in the image load/store sense) use BDW_MOCS_PTE instead of BDW_MOCS_WB. This seems sketchy, as the default PTE value is supposed to be WB LLC eLLC, which is the same as our MOCS WB setting. It's only supposed to change when using a surface for display, which won't ever happen for images. Something may be wrong in the kernel... Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index d896789..87f8601 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -1516,7 +1516,7 @@ update_image_surface(struct brw_context *brw, const int surf_index = surf_offset - &brw->wm.base.surf_offset[0]; brw_emit_surface_state(brw, mt, &view, - surface_state_infos[brw->gen].rb_mocs, false, + surface_state_infos[brw->gen].tex_mocs, false, surf_offset, surf_index, I915_GEM_DOMAIN_SAMPLER, access == GL_READ_ONLY ? 0 : -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 8/8] anv: Properly call gen75_emit_state_base_address on Haswell.
Also, feel free to cc the while lot to stable since it really is a gen7 fix. I guess you could leave 6-7 off stable if you want but Meh. On Jul 18, 2016 6:33 PM, "Jason Ekstrand" wrote: > Series is > > Reviewed-by: Jason Ekstrand > > Thanks for cleaning this up. The more we can share code the better I say. > > On Jul 18, 2016 6:06 PM, "Kenneth Graunke" wrote: > >> This should fix MOCS values. Caught by Coverity. >> >> CID: 1364155 >> >> Signed-off-by: Kenneth Graunke >> --- >> src/intel/vulkan/anv_cmd_buffer.c | 2 +- >> 1 file changed, 1 insertion(+), 1 deletion(-) >> >> diff --git a/src/intel/vulkan/anv_cmd_buffer.c >> b/src/intel/vulkan/anv_cmd_buffer.c >> index 6256df8..380260a 100644 >> --- a/src/intel/vulkan/anv_cmd_buffer.c >> +++ b/src/intel/vulkan/anv_cmd_buffer.c >> @@ -359,7 +359,7 @@ anv_cmd_buffer_emit_state_base_address(struct >> anv_cmd_buffer *cmd_buffer) >> switch (cmd_buffer->device->info.gen) { >> case 7: >>if (cmd_buffer->device->info.is_haswell) >> - return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); >> + return gen75_cmd_buffer_emit_state_base_address(cmd_buffer); >>else >> return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); >> case 8: >> -- >> 2.9.0 >> >> ___ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev >> > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 8/8] anv: Properly call gen75_emit_state_base_address on Haswell.
Series is Reviewed-by: Jason Ekstrand Thanks for cleaning this up. The more we can share code the better I say. On Jul 18, 2016 6:06 PM, "Kenneth Graunke" wrote: > This should fix MOCS values. Caught by Coverity. > > CID: 1364155 > > Signed-off-by: Kenneth Graunke > --- > src/intel/vulkan/anv_cmd_buffer.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/intel/vulkan/anv_cmd_buffer.c > b/src/intel/vulkan/anv_cmd_buffer.c > index 6256df8..380260a 100644 > --- a/src/intel/vulkan/anv_cmd_buffer.c > +++ b/src/intel/vulkan/anv_cmd_buffer.c > @@ -359,7 +359,7 @@ anv_cmd_buffer_emit_state_base_address(struct > anv_cmd_buffer *cmd_buffer) > switch (cmd_buffer->device->info.gen) { > case 7: >if (cmd_buffer->device->info.is_haswell) > - return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); > + return gen75_cmd_buffer_emit_state_base_address(cmd_buffer); >else > return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); > case 8: > -- > 2.9.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Required Mako version? (WAS: mesa from git fails to compile)
Any conclusion or fix for this issue? On Saturday 16 July 2016 02:52:50 Jason Ekstrand wrote: > Adding Dylan > > On Jul 14, 2016 10:24 PM, "Samuel Iglesias Gonsálvez" > > > wrote: > > On 14/07/16 18:34, Eric Engestrom wrote: > > > On Thu, Jul 14, 2016 at 04:01:13PM +0100, Eric Engestrom wrote: > > >> Oh right, there's already check for the Mako version, but the > > >> minimum is currently set to 0.3.4 (configure.ac:92). > > >> > > >> Emil, you were the one to mention 0.8.0; is that the actual > > >> minimum, or just a known working version? > > > > > > OK, so I did a bit of digging, and the version check was > > > introduced by Samuel Iglesias Gonsalvez a couple years ago > > > (2b37bea0) at 0.7.3, and he later lowered it to 0.3.4 > > > (6d43a4c3), but I can't find any discussion regarding this > > > change: it seems there was none on the mailing list [0]. > > > > > > Adding Samuel so he can enlighten us :) > > > > > > [0] > > > > https://lists.freedesktop.org/archives/mesa-dev/2015-January/074366 > > .html > > > > > > There was a discussion in the mailing list. Just after I pushed > > this patch to master [0] setting it to 0.7.3 (because that was the > > version I had back then), Dave Airlie mentioned that RHEL6 only > > ships mako 0.3.4 [1] and asked if we really need a later version > > or not. We did some tests [2][3] and finally this patch [4] was > > pushed upstream. > > > > I don't know if we need some feature from mako 0.8.0 to generate > > isl_format_layout because this file was added later than my change, > > probably Emil knows it. > > > > Sam > > > > [0] > > https://lists.freedesktop.org/archives/mesa-dev/2015-January/074000 > > .html [1] > > https://lists.freedesktop.org/archives/mesa-dev/2015-January/074283 > > .html [2] > > https://lists.freedesktop.org/archives/mesa-dev/2015-January/074287 > > .html [3] > > https://lists.freedesktop.org/archives/mesa-dev/2015-January/074332 > > .html [4] > > https://lists.freedesktop.org/archives/mesa-dev/2015-January/074366 > > .html > > > > > > ___ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev -- Pali Rohár pali.ro...@gmail.com signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] Switch OpenMAX state tracker in Mesa/Gallium to use Tizonia
I am interested in this project idea.I want some help regarding it. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 8/8] anv: Properly call gen75_emit_state_base_address on Haswell.
This should fix MOCS values. Caught by Coverity. CID: 1364155 Signed-off-by: Kenneth Graunke --- src/intel/vulkan/anv_cmd_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 6256df8..380260a 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -359,7 +359,7 @@ anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) switch (cmd_buffer->device->info.gen) { case 7: if (cmd_buffer->device->info.is_haswell) - return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); + return gen75_cmd_buffer_emit_state_base_address(cmd_buffer); else return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); case 8: -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/8] anv: Unify 3DSTATE_CLIP code across generations.
The bulk of this is the same. There are just a couple fields that only exist on one generation or another, and we can easily handle those with an #ifdef. Signed-off-by: Kenneth Graunke --- src/intel/vulkan/gen7_pipeline.c | 23 ++ src/intel/vulkan/gen8_pipeline.c | 25 +++- src/intel/vulkan/genX_pipeline_util.h | 36 +++ 3 files changed, 41 insertions(+), 43 deletions(-) diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index a9f5e0b..8ce50be 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -117,27 +117,8 @@ genX(graphics_pipeline_create)( emit_urb_setup(pipeline); - const VkPipelineRasterizationStateCreateInfo *rs_info = - pCreateInfo->pRasterizationState; - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) { - clip.FrontWinding = vk_to_gen_front_face[rs_info->frontFace], - clip.EarlyCullEnable = true, - clip.CullMode = vk_to_gen_cullmode[rs_info->cullMode], - clip.ClipEnable = !(extra && extra->use_rectlist), - clip.APIMode = APIMODE_D3D, - clip.ViewportXYClipTestEnable = true, - clip.ViewportZClipTestEnable = !pipeline->depth_clamp_enable, - clip.ClipMode = CLIPMODE_NORMAL, - - clip.TriangleStripListProvokingVertexSelect = 0, - clip.LineStripListProvokingVertexSelect = 0, - clip.TriangleFanProvokingVertexSelect = 1, - - clip.MinimumPointWidth= 0.125, - clip.MaximumPointWidth= 255.875, - clip.MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1; - } + emit_3dstate_clip(pipeline, pCreateInfo->pViewportState, + pCreateInfo->pRasterizationState, extra); if (pCreateInfo->pMultisampleState && pCreateInfo->pMultisampleState->rasterizationSamples > 1) diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 52792a9..cc10d3a 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -186,29 +186,10 @@ genX(graphics_pipeline_create)( emit_urb_setup(pipeline); - const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) { - clip.ClipEnable = !(extra && extra->use_rectlist); - clip.EarlyCullEnable = true; - clip.APIMode = APIMODE_D3D; - clip.ViewportXYClipTestEnable = true; - - clip.ClipMode = - pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? - CLIPMODE_REJECT_ALL : CLIPMODE_NORMAL; - - clip.NonPerspectiveBarycentricEnable = wm_prog_data ? - (wm_prog_data->barycentric_interp_modes & 0x38) != 0 : 0; - - clip.TriangleStripListProvokingVertexSelect = 0; - clip.LineStripListProvokingVertexSelect = 0; - clip.TriangleFanProvokingVertexSelect= 1; - - clip.MinimumPointWidth = 0.125; - clip.MaximumPointWidth = 255.875; - clip.MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1; - } + emit_3dstate_clip(pipeline, pCreateInfo->pViewportState, + pCreateInfo->pRasterizationState, extra); + const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), wm) { wm.StatisticsEnable= true; wm.LineEndCapAntialiasingRegionWidth = _05pixels; diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h index 4385112..52263df 100644 --- a/src/intel/vulkan/genX_pipeline_util.h +++ b/src/intel/vulkan/genX_pipeline_util.h @@ -646,3 +646,39 @@ emit_cb_state(struct anv_pipeline *pipeline, #endif } } + +static void +emit_3dstate_clip(struct anv_pipeline *pipeline, + const VkPipelineViewportStateCreateInfo *vp_info, + const VkPipelineRasterizationStateCreateInfo *rs_info, + const struct anv_graphics_pipeline_create_info *extra) +{ + const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); + (void) wm_prog_data; + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) { + clip.ClipEnable = !(extra && extra->use_rectlist); + clip.EarlyCullEnable = true; + clip.APIMode = APIMODE_D3D, + clip.ViewportXYClipTestEnable = true; + + clip.ClipMode = rs_info->rasterizerDiscardEnable ? + CLIPMODE_REJECT_ALL : CLIPMODE_NORMAL; + + clip.TriangleStripListProvokingVertexSelect = 0; + clip.LineStripListProvokingVertexSelect = 0; + clip.TriangleFanProvokingVertexSelect = 1; + + clip.MinimumPointWidth = 0.125; + clip.MaximumPointWidth = 255.875; + clip.MaximumVPIndex= vp_info->viewportCou
[Mesa-dev] [PATCH 2/8] genxml: Add APIMODE_D3D missing enum values and improve consistency.
Signed-off-by: Kenneth Graunke --- src/intel/genxml/gen6.xml| 1 + src/intel/genxml/gen7.xml| 1 + src/intel/genxml/gen75.xml | 1 + src/intel/genxml/gen8.xml| 3 ++- src/intel/genxml/gen9.xml| 3 ++- src/intel/vulkan/gen8_pipeline.c | 2 +- 6 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/intel/genxml/gen6.xml b/src/intel/genxml/gen6.xml index c465288..cf94efc 100644 --- a/src/intel/genxml/gen6.xml +++ b/src/intel/genxml/gen6.xml @@ -787,6 +787,7 @@ + diff --git a/src/intel/genxml/gen7.xml b/src/intel/genxml/gen7.xml index b1c324f..1084093 100644 --- a/src/intel/genxml/gen7.xml +++ b/src/intel/genxml/gen7.xml @@ -959,6 +959,7 @@ + diff --git a/src/intel/genxml/gen75.xml b/src/intel/genxml/gen75.xml index 1239164..b7bf13a 100644 --- a/src/intel/genxml/gen75.xml +++ b/src/intel/genxml/gen75.xml @@ -1068,6 +1068,7 @@ + diff --git a/src/intel/genxml/gen8.xml b/src/intel/genxml/gen8.xml index 386e8fc..dfeda94 100644 --- a/src/intel/genxml/gen8.xml +++ b/src/intel/genxml/gen8.xml @@ -1115,7 +1115,8 @@ - + + diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml index 896143b..06a3cd4 100644 --- a/src/intel/genxml/gen9.xml +++ b/src/intel/genxml/gen9.xml @@ -1167,7 +1167,8 @@ - + + diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 4e04aad..52792a9 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -190,7 +190,7 @@ genX(graphics_pipeline_create)( anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) { clip.ClipEnable = !(extra && extra->use_rectlist); clip.EarlyCullEnable = true; - clip.APIMode = 1; /* D3D */ + clip.APIMode = APIMODE_D3D; clip.ViewportXYClipTestEnable = true; clip.ClipMode = -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 7/8] anv: Perform rasterizer discard in the SOL stage instead of the clipper.
See commit b0629e6894513a2c49a018bc3342a4e55435a236, where we discovered that the SOL stage's "Rendering Disable" feature is a lot faster at throwing away all geometry than the clipper's "reject all" mode. Signed-off-by: Kenneth Graunke --- src/intel/vulkan/gen7_pipeline.c | 1 + src/intel/vulkan/gen8_pipeline.c | 1 + src/intel/vulkan/genX_pipeline_util.h | 12 ++-- src/intel/vulkan/genX_state.c | 1 - 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 8ce50be..6acdd85 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -119,6 +119,7 @@ genX(graphics_pipeline_create)( emit_3dstate_clip(pipeline, pCreateInfo->pViewportState, pCreateInfo->pRasterizationState, extra); + emit_3dstate_streamout(pipeline, pCreateInfo->pRasterizationState); if (pCreateInfo->pMultisampleState && pCreateInfo->pMultisampleState->rasterizationSamples > 1) diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index cc10d3a..0010955 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -188,6 +188,7 @@ genX(graphics_pipeline_create)( emit_3dstate_clip(pipeline, pCreateInfo->pViewportState, pCreateInfo->pRasterizationState, extra); + emit_3dstate_streamout(pipeline, pCreateInfo->pRasterizationState); const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), wm) { diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h index 52263df..3a545a0 100644 --- a/src/intel/vulkan/genX_pipeline_util.h +++ b/src/intel/vulkan/genX_pipeline_util.h @@ -661,8 +661,7 @@ emit_3dstate_clip(struct anv_pipeline *pipeline, clip.APIMode = APIMODE_D3D, clip.ViewportXYClipTestEnable = true; - clip.ClipMode = rs_info->rasterizerDiscardEnable ? - CLIPMODE_REJECT_ALL : CLIPMODE_NORMAL; + clip.ClipMode = CLIPMODE_NORMAL; clip.TriangleStripListProvokingVertexSelect = 0; clip.LineStripListProvokingVertexSelect = 0; @@ -682,3 +681,12 @@ emit_3dstate_clip(struct anv_pipeline *pipeline, #endif } } + +static void +emit_3dstate_streamout(struct anv_pipeline *pipeline, + const VkPipelineRasterizationStateCreateInfo *rs_info) +{ + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_STREAMOUT), so) { + so.RenderingDisable = rs_info->rasterizerDiscardEnable; + } +} diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c index f67b0a7..8253a8b 100644 --- a/src/intel/vulkan/genX_state.c +++ b/src/intel/vulkan/genX_state.c @@ -58,7 +58,6 @@ genX(init_device_state)(struct anv_device *device) anv_batch_emit(&batch, GENX(3DSTATE_TE), ts); anv_batch_emit(&batch, GENX(3DSTATE_DS), ds); - anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), so); anv_batch_emit(&batch, GENX(3DSTATE_AA_LINE_PARAMETERS), aa); anv_batch_emit(&batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) { -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/8] genxml: Add CLIPMODE_* prefix to 3DSTATE_CLIP's "Clip Mode" enum values.
Gen6-7.5 use CLIPMODE_REJECT_ALL, while Gen8+ just used REJECT_ALL. Being consistent will let me unify code, and I prefer having the prefix. Signed-off-by: Kenneth Graunke --- src/intel/genxml/gen8.xml| 6 +++--- src/intel/genxml/gen9.xml| 6 +++--- src/intel/vulkan/gen8_pipeline.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/intel/genxml/gen8.xml b/src/intel/genxml/gen8.xml index 97af191..386e8fc 100644 --- a/src/intel/genxml/gen8.xml +++ b/src/intel/genxml/gen8.xml @@ -1121,9 +1121,9 @@ - - - + + + diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml index 5e3e2e1..896143b 100644 --- a/src/intel/genxml/gen9.xml +++ b/src/intel/genxml/gen9.xml @@ -1173,9 +1173,9 @@ - - - + + + diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 4908bbd..4e04aad 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -195,7 +195,7 @@ genX(graphics_pipeline_create)( clip.ClipMode = pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? - REJECT_ALL : NORMAL; + CLIPMODE_REJECT_ALL : CLIPMODE_NORMAL; clip.NonPerspectiveBarycentricEnable = wm_prog_data ? (wm_prog_data->barycentric_interp_modes & 0x38) != 0 : 0; -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/8] anv: Enable early culling on Gen7.
We set the cull mode, but forgot the enable bit. Gen8 uses this. Signed-off-by: Kenneth Graunke --- src/intel/vulkan/gen7_pipeline.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index a50d9c7..a9f5e0b 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -122,6 +122,7 @@ genX(graphics_pipeline_create)( anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) { clip.FrontWinding = vk_to_gen_front_face[rs_info->frontFace], + clip.EarlyCullEnable = true, clip.CullMode = vk_to_gen_cullmode[rs_info->cullMode], clip.ClipEnable = !(extra && extra->use_rectlist), clip.APIMode = APIMODE_D3D, -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/8] anv: Fix near plane clipping on Gen7/7.5.
The Gen7/7.5 clip code used APIMODE_OGL, while the Gen8+ clip code used APIMODE_D3D. The meaning hasn't changed, so one of these must be wrong. It appears that the hardware documentation is completely wrong. It claims that the "API Mode" bit means: 0hAPIMODE_OGLNEAR_VP boundary == 0.0 (NDC) 1hAPIMODE_D3DNEAR_VP boundary == -1.0 (NDC) However, DirectX typically uses 0.0 for the near plane, while unextended OpenGL uses -1.0. i965's gen6_clip_state.c uses APIMODE_D3D for the GL_ZERO_TO_ONE case, so I believe the meanings are backwards from what the documentation says. Section 23.2 ("Primitive Clipping") of the Vulkan 1.0.21 specification contains the following equations: -w_c <= x_c <= w_c -w_c <= y_c <= w_c 0 <= z_c <= w_c This means that Vulkan follows D3D semantics. Signed-off-by: Kenneth Graunke --- src/intel/vulkan/gen7_pipeline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 01c04f3..a50d9c7 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -124,7 +124,7 @@ genX(graphics_pipeline_create)( clip.FrontWinding = vk_to_gen_front_face[rs_info->frontFace], clip.CullMode = vk_to_gen_cullmode[rs_info->cullMode], clip.ClipEnable = !(extra && extra->use_rectlist), - clip.APIMode = APIMODE_OGL, + clip.APIMode = APIMODE_D3D, clip.ViewportXYClipTestEnable = true, clip.ViewportZClipTestEnable = !pipeline->depth_clamp_enable, clip.ClipMode = CLIPMODE_NORMAL, -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/8] genxml: Rename "API Rendering Disable" to "Rendering Disable".
Gen7/7.5 call it "Rendering Disable" while Gen8/9 prefix it with "API". Pick one for consistency, and so we can share code between generations. Signed-off-by: Kenneth Graunke --- src/intel/genxml/gen8.xml | 2 +- src/intel/genxml/gen9.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/intel/genxml/gen8.xml b/src/intel/genxml/gen8.xml index dfeda94..8145ddd 100644 --- a/src/intel/genxml/gen8.xml +++ b/src/intel/genxml/gen8.xml @@ -2036,7 +2036,7 @@ - + diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml index 06a3cd4..1838d2c 100644 --- a/src/intel/genxml/gen9.xml +++ b/src/intel/genxml/gen9.xml @@ -2239,7 +2239,7 @@ - + -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] i965: Fix shared atomic intrinsics to pay attention to base.
On Mon, 2016-07-18 at 15:49 -0700, Kenneth Graunke wrote: So this fixes a bug with indirects right? Is there a piglit test for this? With the typo Ilia pointed out fixed, both are: Reviewed-by: Timothy Arceri > Signed-off-by: Kenneth Graunke > --- > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 15 +-- > 1 file changed, 13 insertions(+), 2 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index 6265dc6..a39c37e 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -4177,13 +4177,24 @@ fs_visitor::nir_emit_shared_atomic(const > fs_builder &bld, > dest = get_nir_dest(instr->dest); > > fs_reg surface = brw_imm_ud(GEN7_BTI_SLM); > - fs_reg offset = get_nir_src(instr->src[0]); > + fs_reg offset; > fs_reg data1 = get_nir_src(instr->src[1]); > fs_reg data2; > if (op == BRW_AOP_CMPWR) > data2 = get_nir_src(instr->src[2]); > > - /* Emit the actual atomic operation operation */ > + /* Get the offset */ > + nir_const_value *const_offset = nir_src_as_const_value(instr- > >src[0]); > + if (const_offset) { > + offset = brw_imm_ud(instr->const_index[0] + const_offset- > >u32[0]); > + } else { > + offset = vgrf(glsl_type::uint_type); > + bld.ADD(offset, > + retype(get_nir_src(instr->src[0]), > BRW_REGISTER_TYPE_UD), > + brw_imm_ud(instr->const_index[0])); > + } > + > + /* Emit the actua atomic operation operation */ > > fs_reg atomic_result = emit_untyped_atomic(bld, surface, offset, > data1, data2, ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] i965: Fix shared atomic intrinsics to pay attention to base.
On Mon, 2016-07-18 at 15:49 -0700, Kenneth Graunke wrote: So this fixes a bug with indirects right? Is there a piglit test for this? With the typo Ilia pointed out fixed. Reviewed-by: Timothy Arceri > Signed-off-by: Kenneth Graunke > --- > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 15 +-- > 1 file changed, 13 insertions(+), 2 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index 6265dc6..a39c37e 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -4177,13 +4177,24 @@ fs_visitor::nir_emit_shared_atomic(const > fs_builder &bld, > dest = get_nir_dest(instr->dest); > > fs_reg surface = brw_imm_ud(GEN7_BTI_SLM); > - fs_reg offset = get_nir_src(instr->src[0]); > + fs_reg offset; > fs_reg data1 = get_nir_src(instr->src[1]); > fs_reg data2; > if (op == BRW_AOP_CMPWR) > data2 = get_nir_src(instr->src[2]); > > - /* Emit the actual atomic operation operation */ > + /* Get the offset */ > + nir_const_value *const_offset = nir_src_as_const_value(instr- > >src[0]); > + if (const_offset) { > + offset = brw_imm_ud(instr->const_index[0] + const_offset- > >u32[0]); > + } else { > + offset = vgrf(glsl_type::uint_type); > + bld.ADD(offset, > + retype(get_nir_src(instr->src[0]), > BRW_REGISTER_TYPE_UD), > + brw_imm_ud(instr->const_index[0])); > + } > + > + /* Emit the actua atomic operation operation */ > > fs_reg atomic_result = emit_untyped_atomic(bld, surface, offset, > data1, data2, ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 06/11] vl/util: add copy func for yv12image to nv12surface
Zhang, Boyuan wrote: Hi Andy, I just submitted another patch set, most of the issues you reported are solved, please see the information below: - Giving different frame rate should result different output size. The final result from my side is very close to the CBR I set. Please give a try with different frame rate and bit rate. - Picture corruption (half height pic) is caused by interlaced setting. Interlace encoding is not supported. However, for transcoding case, VAAPI decode will use interlace mode, which will cause this issue. The temp solution is to use an Environmental Variable to disable interlace when doing transcoding. Please try the following command with the new patch: DISABLE_INTERLACE=true gst-launch-1.0 filesrc location=~/big_buck_bunny_720p_1mb.mp4 ! qtdemux ! h264parse ! vaapidecode ! vaapih264enc ! filesink location=out.264 - I420 yuv -> nv12 case seems working fine on my side, can you please provide the testing raw file and command you were using? I want to reproduce the issue from my side and try to fix it if possible. Thanks a lot! Will try new patches tomorrow. Here's a few frames of I420 in mkv at 1 fps, should play directly OK with mplayer/mpv - building is yellow. https://drive.google.com/file/d/0BxP5-S1t9VEEc3RhNzBQclhlNWc/view?usp=sharing Of course google will make a preview but you should be able to download the raw file by moving mouse towards the top of the screen. Do gst-launch-1.0 -f filesrc location=I420-5f.mkv ! matroskademux ! vaapih264enc ! h264parse ! mp4mux ! filesink location=out-I420-1.mp4 and the result is blue building, force gstreamer to convert to nv12 result is OK. gst-launch-1.0 -f filesrc location=~/I420-5f.mkv ! matroskademux ! videoconvert ! video/x-raw,format=NV12 ! vaapih264enc ! h264parse ! mp4mux ! filesink location=out-I420-2.mp4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC] gallium/u_queue: add barrier function
possibly.. although sprinkling queue_barrier() calls (which is at least useful for debugging, although I think I won't use it in the end after debugging) hasn't found the issue yet. I did at least find an issue w/ fence handling (I was grabbing the fence # potentially before the batch was flushed), but that also doesn't seem to be the issue I am seeing. The idea of having a ring of N fences (where N is given by max_jobs-1), rather than embedding the fence in the refcnt'd batch, is interesting, and sounds like it might solve some problems. I may end up doing that.. BR, -R On Mon, Jul 18, 2016 at 7:34 PM, Marek Olšák wrote: > I think your issue is that you have self-releasing jobs with the > cleanup callback and you automatically lose fences that way, so there > is no way to wait for completion. > > Since you have only 1 thread with N jobs at most, I suggest you keep > N+1 fences around (a ring of fences) that you reuse for new jobs and > keep a pointer to the most-recently-used fence. That way you know > which fence you need to wait on to make the whole queue idle. > > Marek > > On Mon, Jul 18, 2016 at 10:25 PM, Rob Clark wrote: >> Helper to block until all previous jobs are complete. >> --- >> So I think this might end up being useful to me in some cases.. but >> the implementation only works for a single threaded queue (which is >> all I need). I could also just put a helper in my driver code. >> >> Opinions? >> >> src/gallium/auxiliary/util/u_queue.c | 12 >> src/gallium/auxiliary/util/u_queue.h | 2 ++ >> 2 files changed, 14 insertions(+) >> >> diff --git a/src/gallium/auxiliary/util/u_queue.c >> b/src/gallium/auxiliary/util/u_queue.c >> index 838464f..861faca 100644 >> --- a/src/gallium/auxiliary/util/u_queue.c >> +++ b/src/gallium/auxiliary/util/u_queue.c >> @@ -242,3 +242,15 @@ util_queue_add_job(struct util_queue *queue, >> pipe_condvar_signal(queue->has_queued_cond); >> pipe_mutex_unlock(queue->lock); >> } >> + >> +static void dummy_execute(void *job, int thread_index) {} >> + >> +/* blocks until all previously queued jobs complete: */ >> +void util_queue_barrier(struct util_queue *queue) >> +{ >> + struct util_queue_fence fence; >> + util_queue_fence_init(&fence); >> + util_queue_add_job(queue, &fence /*dummy*/, &fence, dummy_execute, NULL); >> + util_queue_job_wait(&fence); >> + util_queue_fence_destroy(&fence); >> +} >> diff --git a/src/gallium/auxiliary/util/u_queue.h >> b/src/gallium/auxiliary/util/u_queue.h >> index 59646cc..8a22ee0 100644 >> --- a/src/gallium/auxiliary/util/u_queue.h >> +++ b/src/gallium/auxiliary/util/u_queue.h >> @@ -85,6 +85,8 @@ void util_queue_add_job(struct util_queue *queue, >> >> void util_queue_job_wait(struct util_queue_fence *fence); >> >> +void util_queue_barrier(struct util_queue *queue); >> + >> /* util_queue needs to be cleared to zeroes for this to work */ >> static inline bool >> util_queue_is_initialized(struct util_queue *queue) >> -- >> 2.7.4 >> ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: Skip update_texture_surface when the plane doesn't exist
On Mon, Jul 18, 2016 at 9:50 AM, Jordan Justen wrote: > On 2016-07-18 09:14:48, Jason Ekstrand wrote: > > Thanks to rebase fail, recent surface state changes effectively reverted > > This happened in 09b5a71517fadd6c20b72e7ad9ea1f7539c93a42, right? > Should we mention that commit? > Done > Reviewed-by: Jordan Justen > Thanks! > > > 727a9b24933 and 367cf3a2e3e which was unintentional. This should bring > it > > back. > > > > Signed-off-by: Jason Ekstrand > > Cc: Jordan Justen > > --- > > src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 18 ++ > > 1 file changed, 10 insertions(+), 8 deletions(-) > > > > diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > > index c1a7579..d896789 100644 > > --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > > +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > > @@ -385,6 +385,13 @@ brw_update_texture_surface(struct gl_context *ctx, > > } else { > >struct intel_texture_object *intel_obj = > intel_texture_object(obj); > >struct intel_mipmap_tree *mt = intel_obj->mt; > > + > > + if (plane > 0) { > > + if (mt->plane[plane - 1] == NULL) > > +return; > > + mt = mt->plane[plane - 1]; > > + } > > + > >struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, > unit); > >/* If this is a view with restricted NumLayers, then our > effective depth > > * is not just the miptree depth. > > @@ -406,8 +413,9 @@ brw_update_texture_surface(struct gl_context *ctx, > >const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW : > > brw_get_texture_swizzle(&brw->ctx, > obj)); > > > > - unsigned format = translate_tex_format( > > - brw, intel_obj->_Format, sampler->sRGBDecode); > > + mesa_format mesa_fmt = plane == 0 ? intel_obj->_Format : > mt->format; > > + unsigned format = translate_tex_format(brw, mesa_fmt, > > + sampler->sRGBDecode); > > > >/* Implement gen6 and gen7 gather work-around */ > >bool need_green_to_blue = false; > > @@ -449,12 +457,6 @@ brw_update_texture_surface(struct gl_context *ctx, > > assert(brw->gen >= 8); > > mt = mt->stencil_mt; > > format = BRW_SURFACEFORMAT_R8_UINT; > > - } else if (obj->Target == GL_TEXTURE_EXTERNAL_OES) { > > - if (plane > 0) > > -mt = mt->plane[plane - 1]; > > - if (mt == NULL) > > -return; > > - format = translate_tex_format(brw, mt->format, > sampler->sRGBDecode); > >} > > > >const int surf_index = surf_offset - &brw->wm.base.surf_offset[0]; > > -- > > 2.5.0.400.gff86faf > > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC] gallium/u_queue: add barrier function
I think your issue is that you have self-releasing jobs with the cleanup callback and you automatically lose fences that way, so there is no way to wait for completion. Since you have only 1 thread with N jobs at most, I suggest you keep N+1 fences around (a ring of fences) that you reuse for new jobs and keep a pointer to the most-recently-used fence. That way you know which fence you need to wait on to make the whole queue idle. Marek On Mon, Jul 18, 2016 at 10:25 PM, Rob Clark wrote: > Helper to block until all previous jobs are complete. > --- > So I think this might end up being useful to me in some cases.. but > the implementation only works for a single threaded queue (which is > all I need). I could also just put a helper in my driver code. > > Opinions? > > src/gallium/auxiliary/util/u_queue.c | 12 > src/gallium/auxiliary/util/u_queue.h | 2 ++ > 2 files changed, 14 insertions(+) > > diff --git a/src/gallium/auxiliary/util/u_queue.c > b/src/gallium/auxiliary/util/u_queue.c > index 838464f..861faca 100644 > --- a/src/gallium/auxiliary/util/u_queue.c > +++ b/src/gallium/auxiliary/util/u_queue.c > @@ -242,3 +242,15 @@ util_queue_add_job(struct util_queue *queue, > pipe_condvar_signal(queue->has_queued_cond); > pipe_mutex_unlock(queue->lock); > } > + > +static void dummy_execute(void *job, int thread_index) {} > + > +/* blocks until all previously queued jobs complete: */ > +void util_queue_barrier(struct util_queue *queue) > +{ > + struct util_queue_fence fence; > + util_queue_fence_init(&fence); > + util_queue_add_job(queue, &fence /*dummy*/, &fence, dummy_execute, NULL); > + util_queue_job_wait(&fence); > + util_queue_fence_destroy(&fence); > +} > diff --git a/src/gallium/auxiliary/util/u_queue.h > b/src/gallium/auxiliary/util/u_queue.h > index 59646cc..8a22ee0 100644 > --- a/src/gallium/auxiliary/util/u_queue.h > +++ b/src/gallium/auxiliary/util/u_queue.h > @@ -85,6 +85,8 @@ void util_queue_add_job(struct util_queue *queue, > > void util_queue_job_wait(struct util_queue_fence *fence); > > +void util_queue_barrier(struct util_queue *queue); > + > /* util_queue needs to be cleared to zeroes for this to work */ > static inline bool > util_queue_is_initialized(struct util_queue *queue) > -- > 2.7.4 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 96979] Mesa 10.5.7 implementation error: Trying to disable permanently enabled extensions
https://bugs.freedesktop.org/show_bug.cgi?id=96979 --- Comment #3 from Ilia Mirkin --- (In reply to Kenneth Graunke from comment #2) > This isn't something we want to support. > > We should probably use fprintf rather than _mesa_problem so it doesn't print > "Mesa: implementation error" as it isn't an implementation issue - > it's a warning to the user that the debug options they requested won't take > effect because it's unsupported. Actually the issue is that glxinfo hits a segfault later. I guess glGetStringi() or GL_NUM_EXTENSIONS gets confused... As I recall, at some point Brian explicitly added support for disabling always-on exts. Not sure if that work was already in 10.5 or not... def worth checking a more recent version of mesa -- 10.5 is long out of support. -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 06/11] vl/util: add copy func for yv12image to nv12surface
Hi Andy, I just submitted another patch set, most of the issues you reported are solved, please see the information below: - Giving different frame rate should result different output size. The final result from my side is very close to the CBR I set. Please give a try with different frame rate and bit rate. - Picture corruption (half height pic) is caused by interlaced setting. Interlace encoding is not supported. However, for transcoding case, VAAPI decode will use interlace mode, which will cause this issue. The temp solution is to use an Environmental Variable to disable interlace when doing transcoding. Please try the following command with the new patch: DISABLE_INTERLACE=true gst-launch-1.0 filesrc location=~/big_buck_bunny_720p_1mb.mp4 ! qtdemux ! h264parse ! vaapidecode ! vaapih264enc ! filesink location=out.264 - I420 yuv -> nv12 case seems working fine on my side, can you please provide the testing raw file and command you were using? I want to reproduce the issue from my side and try to fix it if possible. Thanks a lot! Hi Christian, Besides fixing those issue listed above, I also modified the code based on your suggestions, e.g. adding mutex lock/unlock. Please take a look at the new patch set, and feel free to give any suggestions/comments. Thanks! Regards, Boyuan -Original Message- From: Christian König [mailto:deathsim...@vodafone.de] Sent: July-18-16 10:15 AM To: Zhang, Boyuan; mesa-dev@lists.freedesktop.org Cc: adf.li...@gmail.com Subject: Re: [PATCH 06/11] vl/util: add copy func for yv12image to nv12surface Am 16.07.2016 um 00:41 schrieb Boyuan Zhang: > Add function to copy from yv12 image to nv12 surface for VAAPI putimage call. > We need this function in VaPutImage call where copying from yv12 image to > nv12 surface for encoding. Existing function can't be used because it only > work for copying from yv12 surface to nv12 image in Vaapi. I think we can keep the patches mostly as they are now, but I would like to get a bit more positive feedback from Andy and maybe others. E.g. at least we should be able to encode something without crashing on Tonga and other hardware generations as well before we push it upstream. Regards, Christian. > > Signed-off-by: Boyuan Zhang > mailto:boyuan.zh...@amd.com>> > --- > src/gallium/auxiliary/util/u_video.h | 23 +++ > 1 file changed, 23 insertions(+) > > diff --git a/src/gallium/auxiliary/util/u_video.h > b/src/gallium/auxiliary/util/u_video.h > index 9196afc..d147295 100644 > --- a/src/gallium/auxiliary/util/u_video.h > +++ b/src/gallium/auxiliary/util/u_video.h > @@ -130,6 +130,29 @@ u_copy_yv12_to_nv12(void *const *destination_data, > } > > static inline void > +u_copy_yv12_img_to_nv12_surf(uint8_t *const *src, > + uint8_t *dest, > + int *offset, > + int field) { > + if (field == 0) { > + for (int i = 0; i < offset[1] ; i++) > + dest[i] = src[field][i]; > + } else if (field == 1) { > + bool odd = false; > + for (int i = 0; i < (offset[1]/2) ; i++){ > + if (odd == false) { > +dest[i] = src[field][i/2]; > +odd = true; > + } else { > +dest[i] = src[field+1][i/2]; > +odd = false; > + } > + } > + } > +} > + > +static inline void > u_copy_swap422_packed(void *const *destination_data, > uint32_t const *destination_pitches, > int src_plane, int src_field, ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] i965: Fix shared atomic intrinsics to pay attention to base.
On Mon, Jul 18, 2016 at 6:49 PM, Kenneth Graunke wrote: > Signed-off-by: Kenneth Graunke > --- > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 15 +-- > 1 file changed, 13 insertions(+), 2 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index 6265dc6..a39c37e 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -4177,13 +4177,24 @@ fs_visitor::nir_emit_shared_atomic(const fs_builder > &bld, >dest = get_nir_dest(instr->dest); > > fs_reg surface = brw_imm_ud(GEN7_BTI_SLM); > - fs_reg offset = get_nir_src(instr->src[0]); > + fs_reg offset; > fs_reg data1 = get_nir_src(instr->src[1]); > fs_reg data2; > if (op == BRW_AOP_CMPWR) >data2 = get_nir_src(instr->src[2]); > > - /* Emit the actual atomic operation operation */ > + /* Get the offset */ > + nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); > + if (const_offset) { > + offset = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]); > + } else { > + offset = vgrf(glsl_type::uint_type); > + bld.ADD(offset, > + retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD), > + brw_imm_ud(instr->const_index[0])); > + } > + > + /* Emit the actua atomic operation operation */ An l got lost... > > fs_reg atomic_result = emit_untyped_atomic(bld, surface, offset, >data1, data2, > -- > 2.9.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] nir: Add a base const_index to shared atomic intrinsics.
Commit 52e75dcb8c04c0dde989970c4c587cbe8313f7cf made nir_lower_io start using nir_intrinsic_set_base instead of writing const_index[0] directly. However, those intrinsics apparently don't /have/ a base, so this caused assert failures. However, the old code was happily setting non-existent const_index fields, so it was pretty bogus too. Jason pointed out that load_shared and store_shared have a base, and that the i965 driver uses that field. So presumably atomics should have one as well, so that loads/stores/atomics all refer to variables with consistent addressing. Signed-off-by: Kenneth Graunke --- src/compiler/nir/nir_intrinsics.h | 20 ++-- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/compiler/nir/nir_intrinsics.h b/src/compiler/nir/nir_intrinsics.h index 2f74555..b2ed67c 100644 --- a/src/compiler/nir/nir_intrinsics.h +++ b/src/compiler/nir/nir_intrinsics.h @@ -266,16 +266,16 @@ INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, xx, xx, xx, *in shared_atomic_add, etc). * 2: For CompSwap only: the second data parameter. */ -INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) -INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) -INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) -INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) -INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) -INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) -INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) -INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) -INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) -INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0) +INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0) +INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0) +INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0) +INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0) +INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0) +INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0) +INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0) +INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0) +INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0) +INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 1, BASE, xx, xx, 0) #define SYSTEM_VALUE(name, components, num_indices, idx0, idx1, idx2) \ INTRINSIC(load_##name, 0, ARR(0), true, components, 0, num_indices, \ -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] i965: Fix shared atomic intrinsics to pay attention to base.
Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 15 +-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 6265dc6..a39c37e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -4177,13 +4177,24 @@ fs_visitor::nir_emit_shared_atomic(const fs_builder &bld, dest = get_nir_dest(instr->dest); fs_reg surface = brw_imm_ud(GEN7_BTI_SLM); - fs_reg offset = get_nir_src(instr->src[0]); + fs_reg offset; fs_reg data1 = get_nir_src(instr->src[1]); fs_reg data2; if (op == BRW_AOP_CMPWR) data2 = get_nir_src(instr->src[2]); - /* Emit the actual atomic operation operation */ + /* Get the offset */ + nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); + if (const_offset) { + offset = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]); + } else { + offset = vgrf(glsl_type::uint_type); + bld.ADD(offset, + retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD), + brw_imm_ud(instr->const_index[0])); + } + + /* Emit the actua atomic operation operation */ fs_reg atomic_result = emit_untyped_atomic(bld, surface, offset, data1, data2, -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 10/12] st/va: add preset values for VAAPI encode
Add some hardcoded values hardware needs mainly for rate control purpose. With previously hardcoded values for OMX, the rate control result is not correct. This change fixed the rate control result by setting correct values for Vaapi. Signed-off-by: Boyuan Zhang --- src/gallium/state_trackers/va/picture.c | 27 +++ 1 file changed, 27 insertions(+) diff --git a/src/gallium/state_trackers/va/picture.c b/src/gallium/state_trackers/va/picture.c index 4793194..518831f 100644 --- a/src/gallium/state_trackers/va/picture.c +++ b/src/gallium/state_trackers/va/picture.c @@ -95,6 +95,32 @@ vlVaGetReferenceFrame(vlVaDriver *drv, VASurfaceID surface_id, *ref_frame = NULL; } +static void +getEncParamPreset(vlVaContext *context) +{ + //motion estimation preset + context->desc.h264enc.motion_est.motion_est_quarter_pixel = 0x0001; + context->desc.h264enc.motion_est.lsmvert = 0x0002; + context->desc.h264enc.motion_est.enc_disable_sub_mode = 0x0078; + context->desc.h264enc.motion_est.enc_en_ime_overw_dis_subm = 0x0001; + context->desc.h264enc.motion_est.enc_ime_overw_dis_subm_no = 0x0001; + context->desc.h264enc.motion_est.enc_ime2_search_range_x = 0x0004; + context->desc.h264enc.motion_est.enc_ime2_search_range_y = 0x0004; + + //pic control preset + context->desc.h264enc.pic_ctrl.enc_cabac_enable = 0x0001; + context->desc.h264enc.pic_ctrl.enc_constraint_set_flags = 0x0040; + + //rate control + context->desc.h264enc.rate_ctrl.vbv_buffer_size = 2000; + context->desc.h264enc.rate_ctrl.vbv_buf_lv = 48; + context->desc.h264enc.rate_ctrl.fill_data_enable = 1; + context->desc.h264enc.rate_ctrl.enforce_hrd = 1; + context->desc.h264enc.enable_vui = false; + + context->desc.h264enc.ref_pic_mode = 0x0201; +} + static VAStatus handlePictureParameterBuffer(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *buf) { @@ -521,6 +547,7 @@ vlVaEndPicture(VADriverContextP ctx, VAContextID context_id) if (context->decoder->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) { coded_buf = context->coded_buf; + getEncParamPreset(context); context->decoder->begin_frame(context->decoder, context->target, &context->desc.base); context->decoder->encode_bitstream(context->decoder, context->target, coded_buf->derived_surface.resource, &feedback); -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 12/12] st/va: enable h264 VAAPI encode
Enable H.264 VAAPI encoding through config. Currently only H.264 baseline is supported. Signed-off-by: Boyuan Zhang --- src/gallium/state_trackers/va/config.c | 32 ++-- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/src/gallium/state_trackers/va/config.c b/src/gallium/state_trackers/va/config.c index 6a36fb3..668c89d 100644 --- a/src/gallium/state_trackers/va/config.c +++ b/src/gallium/state_trackers/va/config.c @@ -74,6 +74,7 @@ vlVaQueryConfigEntrypoints(VADriverContextP ctx, VAProfile profile, { struct pipe_screen *pscreen; enum pipe_video_profile p; + int va_status = VA_STATUS_ERROR_UNSUPPORTED_PROFILE; if (!ctx) return VA_STATUS_ERROR_INVALID_CONTEXT; @@ -90,12 +91,18 @@ vlVaQueryConfigEntrypoints(VADriverContextP ctx, VAProfile profile, return VA_STATUS_ERROR_UNSUPPORTED_PROFILE; pscreen = VL_VA_PSCREEN(ctx); - if (!pscreen->get_video_param(pscreen, p, PIPE_VIDEO_ENTRYPOINT_BITSTREAM, PIPE_VIDEO_CAP_SUPPORTED)) - return VA_STATUS_ERROR_UNSUPPORTED_PROFILE; - - entrypoint_list[(*num_entrypoints)++] = VAEntrypointVLD; + if (pscreen->get_video_param(pscreen, p, PIPE_VIDEO_ENTRYPOINT_BITSTREAM, PIPE_VIDEO_CAP_SUPPORTED)) { + entrypoint_list[(*num_entrypoints)++] = VAEntrypointVLD; + va_status = VA_STATUS_SUCCESS; + } + if (pscreen->get_video_param(pscreen, p, PIPE_VIDEO_ENTRYPOINT_ENCODE, PIPE_VIDEO_CAP_SUPPORTED) && + p == PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE) { + entrypoint_list[(*num_entrypoints)++] = VAEntrypointEncSlice; + entrypoint_list[(*num_entrypoints)++] = VAEntrypointEncPicture; + va_status = VA_STATUS_SUCCESS; + } - return VA_STATUS_SUCCESS; + return va_status; } VAStatus @@ -114,7 +121,7 @@ vlVaGetConfigAttributes(VADriverContextP ctx, VAProfile profile, VAEntrypoint en value = VA_RT_FORMAT_YUV420; break; case VAConfigAttribRateControl: - value = VA_RC_NONE; + value = VA_RC_CQP | VA_RC_CBR; break; default: value = VA_ATTRIB_NOT_SUPPORTED; @@ -161,10 +168,15 @@ vlVaCreateConfig(VADriverContextP ctx, VAProfile profile, VAEntrypoint entrypoin return VA_STATUS_ERROR_UNSUPPORTED_PROFILE; pscreen = VL_VA_PSCREEN(ctx); - if (!pscreen->get_video_param(pscreen, p, PIPE_VIDEO_ENTRYPOINT_BITSTREAM, PIPE_VIDEO_CAP_SUPPORTED)) - return VA_STATUS_ERROR_UNSUPPORTED_PROFILE; - - if (entrypoint != VAEntrypointVLD) + if (entrypoint == VAEntrypointVLD) { + if (!pscreen->get_video_param(pscreen, p, PIPE_VIDEO_ENTRYPOINT_BITSTREAM, PIPE_VIDEO_CAP_SUPPORTED)) + return VA_STATUS_ERROR_UNSUPPORTED_PROFILE; + } + else if (entrypoint == VAEntrypointEncSlice) { + if (!pscreen->get_video_param(pscreen, p, PIPE_VIDEO_ENTRYPOINT_ENCODE, PIPE_VIDEO_CAP_SUPPORTED)) + return VA_STATUS_ERROR_UNSUPPORTED_PROFILE; + } + else return VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; if (entrypoint == VAEntrypointEncSlice || entrypoint == VAEntrypointEncPicture) -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 11/12] st/va: add environmental variable to disable interlace
Add environmental variable to disable interlace mode. At VAAPI decoding stage, driver can not distinguish b/w pure decoding case and transcoding case. And since interlace encoding is not supported, we have to disable interlace for transcoding case. The temporary solution is to use enviromental variable to disable interlace mode. Signed-off-by: Boyuan Zhang --- src/gallium/state_trackers/va/surface.c | 4 1 file changed, 4 insertions(+) diff --git a/src/gallium/state_trackers/va/surface.c b/src/gallium/state_trackers/va/surface.c index 8ce4143..d1296b0 100644 --- a/src/gallium/state_trackers/va/surface.c +++ b/src/gallium/state_trackers/va/surface.c @@ -43,6 +43,8 @@ #include "va_private.h" +DEBUG_GET_ONCE_BOOL_OPTION(nointerlace, "DISABLE_INTERLACE", FALSE); + #include static const enum pipe_format vpp_surface_formats[] = { @@ -620,6 +622,8 @@ vlVaCreateSurfaces2(VADriverContextP ctx, unsigned int format, templat.width = width; templat.height = height; + if (debug_get_option_nointerlace()) + templat.interlaced = false; memset(surfaces, VA_INVALID_ID, num_surfaces * sizeof(VASurfaceID)); -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 08/12] st/va: get rate control method from configattrib
Rate control method is passed from app to driver through config attrib list. That is why we need to store this rate control method to config. And later on, we will pass this value to context->desc.h264enc.rate_ctrl.rate_ctrl_method. Signed-off-by: Boyuan Zhang --- src/gallium/state_trackers/va/config.c | 11 +++ src/gallium/state_trackers/va/context.c| 2 ++ src/gallium/state_trackers/va/va_private.h | 1 + 3 files changed, 14 insertions(+) diff --git a/src/gallium/state_trackers/va/config.c b/src/gallium/state_trackers/va/config.c index 7ea7e24..6a36fb3 100644 --- a/src/gallium/state_trackers/va/config.c +++ b/src/gallium/state_trackers/va/config.c @@ -174,6 +174,17 @@ vlVaCreateConfig(VADriverContextP ctx, VAProfile profile, VAEntrypoint entrypoin config->profile = p; + for (int i = 0; i rc = PIPE_H264_ENC_RATE_CONTROL_METHOD_CONSTANT; + else if (attrib_list[i].value == VA_RC_VBR) +config->rc = PIPE_H264_ENC_RATE_CONTROL_METHOD_VARIABLE; + else +config->rc = PIPE_H264_ENC_RATE_CONTROL_METHOD_DISABLE; + } + } + pipe_mutex_lock(drv->mutex); *config_id = handle_table_add(drv->htab, config); pipe_mutex_unlock(drv->mutex); diff --git a/src/gallium/state_trackers/va/context.c b/src/gallium/state_trackers/va/context.c index 8882cba..65ba7db 100644 --- a/src/gallium/state_trackers/va/context.c +++ b/src/gallium/state_trackers/va/context.c @@ -276,6 +276,8 @@ vlVaCreateContext(VADriverContextP ctx, VAConfigID config_id, int picture_width, context->desc.base.profile = config->profile; context->desc.base.entry_point = config->entrypoint; + if (config->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) + context->desc.h264enc.rate_ctrl.rate_ctrl_method = config->rc; pipe_mutex_lock(drv->mutex); *context_id = handle_table_add(drv->htab, context); diff --git a/src/gallium/state_trackers/va/va_private.h b/src/gallium/state_trackers/va/va_private.h index 723983d..ad9010a 100644 --- a/src/gallium/state_trackers/va/va_private.h +++ b/src/gallium/state_trackers/va/va_private.h @@ -246,6 +246,7 @@ typedef struct { typedef struct { VAEntrypoint entrypoint; enum pipe_video_profile profile; + enum pipe_h264_enc_rate_control_method rc; } vlVaConfig; typedef struct { -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 06/12] vl/util: add copy func for yv12image to nv12surface
Add function to copy from yv12 image to nv12 surface for VAAPI putimage call. We need this function in VaPutImage call where copying from yv12 image to nv12 surface for encoding. Existing function can't be used because it only work for copying from yv12 surface to nv12 image in Vaapi. Signed-off-by: Boyuan Zhang --- src/gallium/auxiliary/util/u_video.h | 23 +++ 1 file changed, 23 insertions(+) diff --git a/src/gallium/auxiliary/util/u_video.h b/src/gallium/auxiliary/util/u_video.h index 9196afc..d147295 100644 --- a/src/gallium/auxiliary/util/u_video.h +++ b/src/gallium/auxiliary/util/u_video.h @@ -130,6 +130,29 @@ u_copy_yv12_to_nv12(void *const *destination_data, } static inline void +u_copy_yv12_img_to_nv12_surf(uint8_t *const *src, + uint8_t *dest, + int *offset, + int field) +{ + if (field == 0) { + for (int i = 0; i < offset[1] ; i++) + dest[i] = src[field][i]; + } else if (field == 1) { + bool odd = false; + for (int i = 0; i < (offset[1]/2) ; i++){ + if (odd == false) { +dest[i] = src[field][i/2]; +odd = true; + } else { +dest[i] = src[field+1][i/2]; +odd = false; + } + } + } +} + +static inline void u_copy_swap422_packed(void *const *destination_data, uint32_t const *destination_pitches, int src_plane, int src_field, -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 09/12] st/va: add functions for VAAPI encode
Add necessary functions/changes for VAAPI encoding to buffer and picture. These changes will allow driver to handle all Vaapi encode related operations. This patch doesn't change the Vaapi decode behaviour. Signed-off-by: Boyuan Zhang --- src/gallium/state_trackers/va/buffer.c | 6 + src/gallium/state_trackers/va/picture.c| 169 - src/gallium/state_trackers/va/va_private.h | 3 + 3 files changed, 176 insertions(+), 2 deletions(-) diff --git a/src/gallium/state_trackers/va/buffer.c b/src/gallium/state_trackers/va/buffer.c index 7d3167b..dfcebbe 100644 --- a/src/gallium/state_trackers/va/buffer.c +++ b/src/gallium/state_trackers/va/buffer.c @@ -133,6 +133,12 @@ vlVaMapBuffer(VADriverContextP ctx, VABufferID buf_id, void **pbuff) if (!buf->derived_surface.transfer || !*pbuff) return VA_STATUS_ERROR_INVALID_BUFFER; + if (buf->type == VAEncCodedBufferType) { + ((VACodedBufferSegment*)buf->data)->buf = *pbuff; + ((VACodedBufferSegment*)buf->data)->size = buf->coded_size; + ((VACodedBufferSegment*)buf->data)->next = NULL; + *pbuff = buf->data; + } } else { pipe_mutex_unlock(drv->mutex); *pbuff = buf->data; diff --git a/src/gallium/state_trackers/va/picture.c b/src/gallium/state_trackers/va/picture.c index 89ac024..4793194 100644 --- a/src/gallium/state_trackers/va/picture.c +++ b/src/gallium/state_trackers/va/picture.c @@ -78,7 +78,8 @@ vlVaBeginPicture(VADriverContextP ctx, VAContextID context_id, VASurfaceID rende return VA_STATUS_SUCCESS; } - context->decoder->begin_frame(context->decoder, context->target, &context->desc.base); + if (context->decoder->entrypoint != PIPE_VIDEO_ENTRYPOINT_ENCODE) + context->decoder->begin_frame(context->decoder, context->target, &context->desc.base); return VA_STATUS_SUCCESS; } @@ -278,6 +279,139 @@ handleVASliceDataBufferType(vlVaContext *context, vlVaBuffer *buf) num_buffers, (const void * const*)buffers, sizes); } +static VAStatus +handleVAEncMiscParameterTypeRateControl(vlVaContext *context, VAEncMiscParameterBuffer *misc) +{ + VAEncMiscParameterRateControl *rc = (VAEncMiscParameterRateControl *)misc->data; + if (context->desc.h264enc.rate_ctrl.rate_ctrl_method == + PIPE_H264_ENC_RATE_CONTROL_METHOD_CONSTANT) + context->desc.h264enc.rate_ctrl.target_bitrate = rc->bits_per_second; + else + context->desc.h264enc.rate_ctrl.target_bitrate = rc->bits_per_second * rc->target_percentage; + context->desc.h264enc.rate_ctrl.peak_bitrate = rc->bits_per_second; + if (context->desc.h264enc.rate_ctrl.target_bitrate < 200) + context->desc.h264enc.rate_ctrl.vbv_buffer_size = MIN2((context->desc.h264enc.rate_ctrl.target_bitrate * 2.75), 200); + else + context->desc.h264enc.rate_ctrl.vbv_buffer_size = context->desc.h264enc.rate_ctrl.target_bitrate; + context->desc.h264enc.rate_ctrl.target_bits_picture = + context->desc.h264enc.rate_ctrl.target_bitrate / context->desc.h264enc.rate_ctrl.frame_rate_num; + context->desc.h264enc.rate_ctrl.peak_bits_picture_integer = + context->desc.h264enc.rate_ctrl.peak_bitrate / context->desc.h264enc.rate_ctrl.frame_rate_num; + context->desc.h264enc.rate_ctrl.peak_bits_picture_fraction = 0; + + return VA_STATUS_SUCCESS; +} + +static VAStatus +handleVAEncSequenceParameterBufferType(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *buf) +{ + VAEncSequenceParameterBufferH264 *h264 = (VAEncSequenceParameterBufferH264 *)buf->data; + if (!context->decoder) { + context->templat.max_references = h264->max_num_ref_frames; + context->templat.level = h264->level_idc; + context->decoder = drv->pipe->create_video_codec(drv->pipe, &context->templat); + if (!context->decoder) + return VA_STATUS_ERROR_ALLOCATION_FAILED; + } + context->desc.h264enc.gop_size = h264->intra_idr_period; + context->desc.h264enc.rate_ctrl.frame_rate_num = h264->time_scale / 2; + context->desc.h264enc.rate_ctrl.frame_rate_den = 1; + return VA_STATUS_SUCCESS; +} + +static VAStatus +handleVAEncMiscParameterBufferType(vlVaContext *context, vlVaBuffer *buf) +{ + VAStatus vaStatus = VA_STATUS_SUCCESS; + VAEncMiscParameterBuffer *misc; + misc = buf->data; + + switch (misc->type) { + case VAEncMiscParameterTypeRateControl: + vaStatus = handleVAEncMiscParameterTypeRateControl(context, misc); + break; + + default: + break; + } + + return vaStatus; +} + +static VAStatus +handleVAEncPictureParameterBufferType(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *buf) +{ + VAEncPictureParameterBufferH264 *h264; + vlVaBuffer *coded_buf; + + h264 = buf->data; + context->desc.h264enc.frame_num = h264->frame_num; + context->desc.h264enc.not_referenced = false; + context->desc.h264enc.is_idr = (h264->pic_fields.bits.idr_pic_flag == 1); + context->desc.h264enc.pic_order_cnt = h264->CurrP
[Mesa-dev] [PATCH 07/12] st/va: add conversion for yv12 to nv12in putimage
For putimage call, if image format is yv12 (or IYUV with U V field swap) and surface format is nv12, then we need to convert yv12 to nv12 and then copy the converted data from image to surface. We can't use the existing logic where surface is destroyed and re-created with yv12 format. Signed-off-by: Boyuan Zhang --- src/gallium/state_trackers/va/image.c | 33 ++--- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/src/gallium/state_trackers/va/image.c b/src/gallium/state_trackers/va/image.c index 1b956e3..47895ee 100644 --- a/src/gallium/state_trackers/va/image.c +++ b/src/gallium/state_trackers/va/image.c @@ -471,7 +471,9 @@ vlVaPutImage(VADriverContextP ctx, VASurfaceID surface, VAImageID image, return VA_STATUS_ERROR_OPERATION_FAILED; } - if (format != surf->buffer->buffer_format) { + if ((format != surf->buffer->buffer_format) && + ((format != PIPE_FORMAT_YV12) || (surf->buffer->buffer_format != PIPE_FORMAT_NV12)) && + ((format != PIPE_FORMAT_IYUV) || (surf->buffer->buffer_format != PIPE_FORMAT_NV12))) { struct pipe_video_buffer *tmp_buf; struct pipe_video_buffer templat = surf->templat; @@ -513,12 +515,29 @@ vlVaPutImage(VADriverContextP ctx, VASurfaceID surface, VAImageID image, unsigned width, height; if (!views[i]) continue; vlVaVideoSurfaceSize(surf, i, &width, &height); - for (j = 0; j < views[i]->texture->array_size; ++j) { - struct pipe_box dst_box = {0, 0, j, width, height, 1}; - drv->pipe->transfer_inline_write(drv->pipe, views[i]->texture, 0, -PIPE_TRANSFER_WRITE, &dst_box, -data[i] + pitches[i] * j, -pitches[i] * views[i]->texture->array_size, 0); + if ((format == PIPE_FORMAT_YV12) || (format == PIPE_FORMAT_IYUV) && +(surf->buffer->buffer_format == PIPE_FORMAT_NV12)) { + struct pipe_transfer *transfer = NULL; + uint8_t *map = NULL; + struct pipe_box dst_box_1 = {0, 0, 0, width, height, 1}; + map = drv->pipe->transfer_map(drv->pipe, + views[i]->texture, + 0, + PIPE_TRANSFER_DISCARD_RANGE, + &dst_box_1, &transfer); + if (map == NULL) +return VA_STATUS_ERROR_OPERATION_FAILED; + + u_copy_yv12_img_to_nv12_surf (data, map, vaimage->offsets, i); + pipe_transfer_unmap(drv->pipe, transfer); + } else { + for (j = 0; j < views[i]->texture->array_size; ++j) { +struct pipe_box dst_box = {0, 0, j, width, height, 1}; +drv->pipe->transfer_inline_write(drv->pipe, views[i]->texture, 0, + PIPE_TRANSFER_WRITE, &dst_box, + data[i] + pitches[i] * j, + pitches[i] * views[i]->texture->array_size, 0); + } } } pipe_mutex_unlock(drv->mutex); -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 05/12] st/va: add encode entrypoint
VAAPI passes PIPE_VIDEO_ENTRYPOINT_ENCODE as entry point for encoding case. We will save this encode entry point in config. config_id was used as profile previously. Now, config has both profile and entrypoint field, and config_id is used to get the config object. Later on, we pass this entrypoint to context->templat.entrypoint instead of always hardcoded to PIPE_VIDEO_ENTRYPOINT_BITSTREAM for decoding case previously. Signed-off-by: Boyuan Zhang --- src/gallium/state_trackers/va/config.c | 69 +++--- src/gallium/state_trackers/va/context.c| 59 ++--- src/gallium/state_trackers/va/surface.c| 14 -- src/gallium/state_trackers/va/va_private.h | 5 +++ 4 files changed, 115 insertions(+), 32 deletions(-) diff --git a/src/gallium/state_trackers/va/config.c b/src/gallium/state_trackers/va/config.c index 9ca0aa8..7ea7e24 100644 --- a/src/gallium/state_trackers/va/config.c +++ b/src/gallium/state_trackers/va/config.c @@ -34,6 +34,8 @@ #include "va_private.h" +#include "util/u_handle_table.h" + DEBUG_GET_ONCE_BOOL_OPTION(mpeg4, "VAAPI_MPEG4_ENABLED", false) VAStatus @@ -128,14 +130,29 @@ VAStatus vlVaCreateConfig(VADriverContextP ctx, VAProfile profile, VAEntrypoint entrypoint, VAConfigAttrib *attrib_list, int num_attribs, VAConfigID *config_id) { + vlVaDriver *drv; + vlVaConfig *config; struct pipe_screen *pscreen; enum pipe_video_profile p; if (!ctx) return VA_STATUS_ERROR_INVALID_CONTEXT; + drv = VL_VA_DRIVER(ctx); + + if (!drv) + return VA_STATUS_ERROR_INVALID_CONTEXT; + + config = CALLOC(1, sizeof(vlVaConfig)); + if (!config) + return VA_STATUS_ERROR_ALLOCATION_FAILED; + if (profile == VAProfileNone && entrypoint == VAEntrypointVideoProc) { - *config_id = PIPE_VIDEO_PROFILE_UNKNOWN; + config->entrypoint = VAEntrypointVideoProc; + config->profile = PIPE_VIDEO_PROFILE_UNKNOWN; + pipe_mutex_lock(drv->mutex); + *config_id = handle_table_add(drv->htab, config); + pipe_mutex_unlock(drv->mutex); return VA_STATUS_SUCCESS; } @@ -150,7 +167,16 @@ vlVaCreateConfig(VADriverContextP ctx, VAProfile profile, VAEntrypoint entrypoin if (entrypoint != VAEntrypointVLD) return VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; - *config_id = p; + if (entrypoint == VAEntrypointEncSlice || entrypoint == VAEntrypointEncPicture) + config->entrypoint = PIPE_VIDEO_ENTRYPOINT_ENCODE; + else + config->entrypoint = PIPE_VIDEO_ENTRYPOINT_BITSTREAM; + + config->profile = p; + + pipe_mutex_lock(drv->mutex); + *config_id = handle_table_add(drv->htab, config); + pipe_mutex_unlock(drv->mutex); return VA_STATUS_SUCCESS; } @@ -158,9 +184,27 @@ vlVaCreateConfig(VADriverContextP ctx, VAProfile profile, VAEntrypoint entrypoin VAStatus vlVaDestroyConfig(VADriverContextP ctx, VAConfigID config_id) { + vlVaDriver *drv; + vlVaConfig *config; + if (!ctx) return VA_STATUS_ERROR_INVALID_CONTEXT; + drv = VL_VA_DRIVER(ctx); + + if (!drv) + return VA_STATUS_ERROR_INVALID_CONTEXT; + + pipe_mutex_lock(drv->mutex); + config = handle_table_get(drv->htab, config_id); + + if (!config) + return VA_STATUS_ERROR_INVALID_CONFIG; + + FREE(config); + handle_table_remove(drv->htab, config_id); + pipe_mutex_unlock(drv->mutex); + return VA_STATUS_SUCCESS; } @@ -168,18 +212,33 @@ VAStatus vlVaQueryConfigAttributes(VADriverContextP ctx, VAConfigID config_id, VAProfile *profile, VAEntrypoint *entrypoint, VAConfigAttrib *attrib_list, int *num_attribs) { + vlVaDriver *drv; + vlVaConfig *config; + if (!ctx) return VA_STATUS_ERROR_INVALID_CONTEXT; - *profile = PipeToProfile(config_id); + drv = VL_VA_DRIVER(ctx); + + if (!drv) + return VA_STATUS_ERROR_INVALID_CONTEXT; + + pipe_mutex_lock(drv->mutex); + config = handle_table_get(drv->htab, config_id); + pipe_mutex_unlock(drv->mutex); + + if (!config) + return VA_STATUS_ERROR_INVALID_CONFIG; + + *profile = PipeToProfile(config->profile); - if (config_id == PIPE_VIDEO_PROFILE_UNKNOWN) { + if (config->profile == PIPE_VIDEO_PROFILE_UNKNOWN) { *entrypoint = VAEntrypointVideoProc; *num_attribs = 0; return VA_STATUS_SUCCESS; } - *entrypoint = VAEntrypointVLD; + *entrypoint = config->entrypoint; *num_attribs = 1; attrib_list[0].type = VAConfigAttribRTFormat; diff --git a/src/gallium/state_trackers/va/context.c b/src/gallium/state_trackers/va/context.c index 402fbb2..8882cba 100644 --- a/src/gallium/state_trackers/va/context.c +++ b/src/gallium/state_trackers/va/context.c @@ -195,18 +195,23 @@ vlVaCreateContext(VADriverContextP ctx, VAConfigID config_id, int picture_width, { vlVaDriver *drv; vlVaContext *context; + vlVaConfig *config; int is_vpp; if (!ctx) return VA_STATUS_ERROR_INVALID_CONTEXT; - is_vpp = confi
[Mesa-dev] [PATCH 02/12] vl: add entry point
Add entrypoint to distinguish H.264 decode and encode. For example, in patch 5/11 when is calling "VaCreateContext", "pps" and "sps" shouldn't be allocated for H.264 encoding. So we need to use the entry_point to determine this is H.264 decode or H.264 encode. We can use config to determine the entrypoint since config_id is passed to us for VaCreateContext call. However, for VaDestoyContext call, only context_id is passed to us. So we need to know the entrypoint in order to not free the pps/sps for encoding case. Signed-off-by: Boyuan Zhang --- src/gallium/include/pipe/p_video_state.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h index 754d013..39b3905 100644 --- a/src/gallium/include/pipe/p_video_state.h +++ b/src/gallium/include/pipe/p_video_state.h @@ -131,6 +131,7 @@ enum pipe_h264_enc_rate_control_method struct pipe_picture_desc { enum pipe_video_profile profile; + enum pipe_video_entrypoint entry_point; }; struct pipe_quant_matrix -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 96979] Mesa 10.5.7 implementation error: Trying to disable permanently enabled extensions
https://bugs.freedesktop.org/show_bug.cgi?id=96979 --- Comment #2 from Kenneth Graunke --- This isn't something we want to support. We should probably use fprintf rather than _mesa_problem so it doesn't print "Mesa: implementation error" as it isn't an implementation issue - it's a warning to the user that the debug options they requested won't take effect because it's unsupported. -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH RFC 2/8] nvc0: bind images for 3d/cp shaders on GM107+
On 07/18/2016 11:13 PM, Ilia Mirkin wrote: On Mon, Jul 18, 2016 at 4:55 PM, Samuel Pitoiset wrote: On Maxwell, images binding is slightly different (and much better) regarding Fermi and Kepler because a texture view needs to be uploaded for each image and this is going to simplify the thing a lot. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_context.c | 5 +- src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 4 + src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 10 ++- src/gallium/drivers/nouveau/nvc0/nvc0_tex.c | 110 ++-- src/gallium/drivers/nouveau/nvc0/nve4_compute.c | 91 ++-- 5 files changed, 202 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c index 1137e6c..4bd240b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c @@ -161,8 +161,11 @@ nvc0_context_unreference_resources(struct nvc0_context *nvc0) for (i = 0; i < NVC0_MAX_BUFFERS; ++i) pipe_resource_reference(&nvc0->buffers[s][i].buffer, NULL); - for (i = 0; i < NVC0_MAX_IMAGES; ++i) + for (i = 0; i < NVC0_MAX_IMAGES; ++i) { pipe_resource_reference(&nvc0->images[s][i].resource, NULL); + if (nvc0->screen->base.class_3d >= GM107_3D_CLASS) +pipe_sampler_view_reference(&nvc0->images_tic[s][i], NULL); + } } for (s = 0; s < 2; ++s) { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index 4b73ec3..1d9fca1 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -246,6 +246,7 @@ struct nvc0_context { uint32_t buffers_valid[6]; struct pipe_image_view images[6][NVC0_MAX_IMAGES]; + struct pipe_sampler_view *images_tic[6][NVC0_MAX_IMAGES]; /* GM107+ */ uint16_t images_dirty[6]; uint16_t images_valid[6]; @@ -349,6 +350,9 @@ struct pipe_sampler_view * nvc0_create_sampler_view(struct pipe_context *, struct pipe_resource *, const struct pipe_sampler_view *); +struct pipe_sampler_view * +gm107_create_texture_view_from_image(struct pipe_context *, + struct pipe_image_view *); /* nvc0_transfer.c */ void diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c index 441cfc9..98becf4 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -1303,8 +1303,16 @@ nvc0_bind_images_range(struct nvc0_context *nvc0, const unsigned s, mask = ((1 << nr) - 1) << start; if (!(nvc0->images_valid[s] & mask)) return false; - for (i = start; i < end; ++i) + for (i = start; i < end; ++i) { pipe_resource_reference(&nvc0->images[s][i].resource, NULL); + if (nvc0->screen->base.class_3d >= GM107_3D_CLASS) { +struct nv50_tic_entry *old = nv50_tic_entry(nvc0->images_tic[s][i]); +if (old) { + nvc0_screen_tic_unlock(nvc0->screen, old); + pipe_sampler_view_reference(&nvc0->images_tic[s][i], NULL); +} + } + } nvc0->images_valid[s] &= ~mask; } nvc0->images_dirty[s] |= mask; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c index 5f7bba8..efbaacf 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c @@ -236,6 +236,38 @@ gm107_create_texture_view(struct pipe_context *pipe, return &view->pipe; } +struct pipe_sampler_view * +gm107_create_texture_view_from_image(struct pipe_context *pipe, + struct pipe_image_view *view) +{ + struct nv04_resource *res = nv04_resource(view->resource); + enum pipe_texture_target target = res->base.target; + struct pipe_sampler_view templ = {}; + uint32_t flags = 0; + + if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY) + target = PIPE_TEXTURE_2D_ARRAY; + + templ.format = view->format; + templ.swizzle_r = PIPE_SWIZZLE_X; + templ.swizzle_g = PIPE_SWIZZLE_Y; + templ.swizzle_b = PIPE_SWIZZLE_Z; + templ.swizzle_a = PIPE_SWIZZLE_W; + + if (target == PIPE_BUFFER) { + templ.u.buf.first_element = view->u.buf.first_element; + templ.u.buf.last_element = view->u.buf.last_element; + } else { + templ.u.tex.first_layer = view->u.tex.first_layer; + templ.u.tex.last_layer = view->u.tex.last_layer; + templ.u.tex.first_level = templ.u.tex.last_level = view->u.tex.level; + } + + flags = res->base.last_level ? 0 : NV50_TEXVIEW_SCALED_COORDS; I think you just want 0 here, always. I thought too, but this will hit the assert at nvc0_tex.c:134 for image buffers. And this is loose
Re: [Mesa-dev] [PATCH] virgl: add exported dmabuf to BO hash table
On Fri, Jun 17, 2016 at 5:25 PM, Rob Herring wrote: > Exported dmabufs can get imported by the same process, but the handle was > not getting added to the hash table on export. Add the handle to the hash > table on export. Ping. Dave, can you please apply. Rob > > Cc: Dave Airlie > Signed-off-by: Rob Herring > --- > src/gallium/winsys/virgl/drm/virgl_drm_winsys.c | 3 +++ > 1 file changed, 3 insertions(+) > > diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c > b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c > index cbd416c..8336a33 100644 > --- a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c > +++ b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c > @@ -486,6 +486,9 @@ static boolean > virgl_drm_winsys_resource_get_handle(struct virgl_winsys *qws, > } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) { >if (drmPrimeHandleToFD(qdws->fd, res->bo_handle, DRM_CLOEXEC, > (int*)&whandle->handle)) > return FALSE; > + pipe_mutex_lock(qdws->bo_handles_mutex); > + util_hash_table_set(qdws->bo_handles, (void > *)(uintptr_t)res->bo_handle, res); > + pipe_mutex_unlock(qdws->bo_handles_mutex); > } > whandle->stride = stride; > return TRUE; > -- > 2.7.4 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] nvc0: add support for BGRA8 images
On 07/18/2016 11:16 PM, Ilia Mirkin wrote: Karol tested it on Kepler1, so we should be good. Reviewed-by: Samuel Pitoiset On Mon, Jul 18, 2016 at 5:15 PM, Samuel Pitoiset wrote: Works fine on Fermi, but still need to test on Kepler1. On 07/16/2016 09:09 PM, Ilia Mirkin wrote: This is useful for pbo downloads, which are now accelerated with images. BGRA8 is a moderately common format to do that in. Signed-off-by: Ilia Mirkin --- This needs testing on SM20 and SM30. I've tested it on SM35 and bin/pbo-readpixels-small -auto worked fine. (Didn't until I properly fixed the various items.) src/gallium/drivers/nouveau/codegen/nv50_ir.cpp | 2 ++ src/gallium/drivers/nouveau/codegen/nv50_ir.h | 3 +++ src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 2 ++ src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 4 src/gallium/drivers/nouveau/nv50/g80_defs.xml.h | 1 + src/gallium/drivers/nouveau/nv50/nv50_formats.c | 3 ++- src/gallium/drivers/nouveau/nvc0/nvc0_tex.c | 2 ++ 7 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp index 2caebe8..179ad0b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp @@ -1012,6 +1012,8 @@ const struct TexInstruction::ImgFormatDesc TexInstruction::formatTable[] = { "RG8_SNORM",2, { 8, 8, 0, 0 }, SNORM }, { "R16_SNORM",1, { 16, 0, 0, 0 }, SNORM }, { "R8_SNORM", 1, { 8, 0, 0, 0 }, SNORM }, + + { "BGRA8",4, { 8, 8, 8, 8 }, UNORM, true }, }; void diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index 41804b6..6d2ee8b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -412,6 +412,8 @@ enum ImgFormat FMT_R16_SNORM, FMT_R8_SNORM, + FMT_BGRA8, + IMG_FORMAT_COUNT, }; @@ -967,6 +969,7 @@ public: uint8_t components; uint8_t bits[4]; ImgType type; + bool bgra; }; static const struct ImgFormatDesc formatTable[IMG_FORMAT_COUNT]; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 61eb7f5..7dff08a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -508,6 +508,8 @@ static nv50_ir::ImgFormat translateImgFormat(uint format) FMT_CASE(R8G8_SNORM, RG8_SNORM); FMT_CASE(R16_SNORM, R16_SNORM); FMT_CASE(R8_SNORM, R8_SNORM); + + FMT_CASE(B8G8R8A8_UNORM, BGRA8); } assert(!"Unexpected format"); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 18955eb..92bc0bb 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1940,6 +1940,10 @@ NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su) bld.mkCvt(OP_CVT, TYPE_F32, typedDst[i], TYPE_F16, typedDst[i]); } } + + if (format->bgra) { + std::swap(typedDst[0], typedDst[2]); + } } void diff --git a/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h b/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h index 5d40624..49bf860 100644 --- a/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h +++ b/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h @@ -177,6 +177,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define GK104_IMAGE_FORMAT_RG32_FLOAT 0x000d #define GK104_IMAGE_FORMAT_RG32_SINT 0x000e #define GK104_IMAGE_FORMAT_RG32_UINT 0x000f +#define GK104_IMAGE_FORMAT_BGRA8_UNORM 0x0011 #define GK104_IMAGE_FORMAT_RGB10_A2_UNORM 0x0013 #define GK104_IMAGE_FORMAT_RGB10_A2_UINT 0x0015 #define GK104_IMAGE_FORMAT_RGBA8_UNORM 0x0018 diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c b/src/gallium/drivers/nouveau/nv50/nv50_formats.c index 34d32d1..07c4419 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c @@ -50,6 +50,7 @@ #define U_IB PIPE_BIND_BLENDABLE | U_IR #define U_TD PIPE_BIND_SCANOUT | PIPE_BIND_DISPLAY_TARGET | U_TB #define U_TZ PIPE_BIND_DEPTH_STENCIL | U_T +#define U_ID U_TD | U_I #if NOUVEAU_DRIVER == 0xc0 # define U_TC U_TB # define U_IC U_IB @@ -122,7 +123,7 @@ const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = #endif { - C4(A, B8G8R8A8_UNORM, BGRA8_UNORM, B, G, R, A, UNORM
Re: [Mesa-dev] [PATCH] nvc0: add support for BGRA8 images
Karol tested it on Kepler1, so we should be good. On Mon, Jul 18, 2016 at 5:15 PM, Samuel Pitoiset wrote: > Works fine on Fermi, but still need to test on Kepler1. > > > On 07/16/2016 09:09 PM, Ilia Mirkin wrote: >> >> This is useful for pbo downloads, which are now accelerated with images. >> BGRA8 is a moderately common format to do that in. >> >> Signed-off-by: Ilia Mirkin >> --- >> >> This needs testing on SM20 and SM30. I've tested it on SM35 and >> >> bin/pbo-readpixels-small -auto >> >> worked fine. (Didn't until I properly fixed the various items.) >> >> src/gallium/drivers/nouveau/codegen/nv50_ir.cpp | 2 ++ >> src/gallium/drivers/nouveau/codegen/nv50_ir.h | 3 +++ >> src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 2 ++ >> src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 4 >> src/gallium/drivers/nouveau/nv50/g80_defs.xml.h | 1 + >> src/gallium/drivers/nouveau/nv50/nv50_formats.c | 3 ++- >> src/gallium/drivers/nouveau/nvc0/nvc0_tex.c | 2 ++ >> 7 files changed, 16 insertions(+), 1 deletion(-) >> >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp >> b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp >> index 2caebe8..179ad0b 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp >> @@ -1012,6 +1012,8 @@ const struct TexInstruction::ImgFormatDesc >> TexInstruction::formatTable[] = >> { "RG8_SNORM",2, { 8, 8, 0, 0 }, SNORM }, >> { "R16_SNORM",1, { 16, 0, 0, 0 }, SNORM }, >> { "R8_SNORM", 1, { 8, 0, 0, 0 }, SNORM }, >> + >> + { "BGRA8",4, { 8, 8, 8, 8 }, UNORM, true }, >> }; >> >> void >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h >> b/src/gallium/drivers/nouveau/codegen/nv50_ir.h >> index 41804b6..6d2ee8b 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h >> @@ -412,6 +412,8 @@ enum ImgFormat >> FMT_R16_SNORM, >> FMT_R8_SNORM, >> >> + FMT_BGRA8, >> + >> IMG_FORMAT_COUNT, >> }; >> >> @@ -967,6 +969,7 @@ public: >>uint8_t components; >>uint8_t bits[4]; >>ImgType type; >> + bool bgra; >> }; >> >> static const struct ImgFormatDesc formatTable[IMG_FORMAT_COUNT]; >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp >> b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp >> index 61eb7f5..7dff08a 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp >> @@ -508,6 +508,8 @@ static nv50_ir::ImgFormat translateImgFormat(uint >> format) >> FMT_CASE(R8G8_SNORM, RG8_SNORM); >> FMT_CASE(R16_SNORM, R16_SNORM); >> FMT_CASE(R8_SNORM, R8_SNORM); >> + >> + FMT_CASE(B8G8R8A8_UNORM, BGRA8); >> } >> >> assert(!"Unexpected format"); >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp >> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp >> index 18955eb..92bc0bb 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp >> @@ -1940,6 +1940,10 @@ >> NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su) >> bld.mkCvt(OP_CVT, TYPE_F32, typedDst[i], TYPE_F16, typedDst[i]); >>} >> } >> + >> + if (format->bgra) { >> + std::swap(typedDst[0], typedDst[2]); >> + } >> } >> >> void >> diff --git a/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h >> b/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h >> index 5d40624..49bf860 100644 >> --- a/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h >> +++ b/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h >> @@ -177,6 +177,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE >> SOFTWARE. >> #define GK104_IMAGE_FORMAT_RG32_FLOAT 0x000d >> #define GK104_IMAGE_FORMAT_RG32_SINT 0x000e >> #define GK104_IMAGE_FORMAT_RG32_UINT 0x000f >> +#define GK104_IMAGE_FORMAT_BGRA8_UNORM 0x0011 >> #define GK104_IMAGE_FORMAT_RGB10_A2_UNORM 0x0013 >> #define GK104_IMAGE_FORMAT_RGB10_A2_UINT 0x0015 >> #define GK104_IMAGE_FORMAT_RGBA8_UNORM 0x0018 >> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c >> b/src/gallium/drivers/nouveau/nv50/nv50_formats.c >> index 34d32d1..07c4419 100644 >> --- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c >> +++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c >> @@ -50,6 +50,7 @@ >> #define U_IB PIPE_BIND_BLENDABLE | U_IR >> #define U_TD PIPE_BIND_SCANOUT | PIPE_BIND_DISPLAY_TARGET | U_TB >> #define U_TZ PIPE_BIND_DEPTH_STENCIL | U_T >> +#define U_ID U_TD | U_I >> #if NOUVEAU_DRIVER == 0xc0
Re: [Mesa-dev] [PATCH] nvc0: add support for BGRA8 images
Works fine on Fermi, but still need to test on Kepler1. On 07/16/2016 09:09 PM, Ilia Mirkin wrote: This is useful for pbo downloads, which are now accelerated with images. BGRA8 is a moderately common format to do that in. Signed-off-by: Ilia Mirkin --- This needs testing on SM20 and SM30. I've tested it on SM35 and bin/pbo-readpixels-small -auto worked fine. (Didn't until I properly fixed the various items.) src/gallium/drivers/nouveau/codegen/nv50_ir.cpp | 2 ++ src/gallium/drivers/nouveau/codegen/nv50_ir.h | 3 +++ src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 2 ++ src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 4 src/gallium/drivers/nouveau/nv50/g80_defs.xml.h | 1 + src/gallium/drivers/nouveau/nv50/nv50_formats.c | 3 ++- src/gallium/drivers/nouveau/nvc0/nvc0_tex.c | 2 ++ 7 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp index 2caebe8..179ad0b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp @@ -1012,6 +1012,8 @@ const struct TexInstruction::ImgFormatDesc TexInstruction::formatTable[] = { "RG8_SNORM",2, { 8, 8, 0, 0 }, SNORM }, { "R16_SNORM",1, { 16, 0, 0, 0 }, SNORM }, { "R8_SNORM", 1, { 8, 0, 0, 0 }, SNORM }, + + { "BGRA8",4, { 8, 8, 8, 8 }, UNORM, true }, }; void diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index 41804b6..6d2ee8b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -412,6 +412,8 @@ enum ImgFormat FMT_R16_SNORM, FMT_R8_SNORM, + FMT_BGRA8, + IMG_FORMAT_COUNT, }; @@ -967,6 +969,7 @@ public: uint8_t components; uint8_t bits[4]; ImgType type; + bool bgra; }; static const struct ImgFormatDesc formatTable[IMG_FORMAT_COUNT]; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 61eb7f5..7dff08a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -508,6 +508,8 @@ static nv50_ir::ImgFormat translateImgFormat(uint format) FMT_CASE(R8G8_SNORM, RG8_SNORM); FMT_CASE(R16_SNORM, R16_SNORM); FMT_CASE(R8_SNORM, R8_SNORM); + + FMT_CASE(B8G8R8A8_UNORM, BGRA8); } assert(!"Unexpected format"); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 18955eb..92bc0bb 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1940,6 +1940,10 @@ NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su) bld.mkCvt(OP_CVT, TYPE_F32, typedDst[i], TYPE_F16, typedDst[i]); } } + + if (format->bgra) { + std::swap(typedDst[0], typedDst[2]); + } } void diff --git a/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h b/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h index 5d40624..49bf860 100644 --- a/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h +++ b/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h @@ -177,6 +177,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define GK104_IMAGE_FORMAT_RG32_FLOAT 0x000d #define GK104_IMAGE_FORMAT_RG32_SINT 0x000e #define GK104_IMAGE_FORMAT_RG32_UINT 0x000f +#define GK104_IMAGE_FORMAT_BGRA8_UNORM 0x0011 #define GK104_IMAGE_FORMAT_RGB10_A2_UNORM 0x0013 #define GK104_IMAGE_FORMAT_RGB10_A2_UINT 0x0015 #define GK104_IMAGE_FORMAT_RGBA8_UNORM 0x0018 diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c b/src/gallium/drivers/nouveau/nv50/nv50_formats.c index 34d32d1..07c4419 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c @@ -50,6 +50,7 @@ #define U_IB PIPE_BIND_BLENDABLE | U_IR #define U_TD PIPE_BIND_SCANOUT | PIPE_BIND_DISPLAY_TARGET | U_TB #define U_TZ PIPE_BIND_DEPTH_STENCIL | U_T +#define U_ID U_TD | U_I #if NOUVEAU_DRIVER == 0xc0 # define U_TC U_TB # define U_IC U_IB @@ -122,7 +123,7 @@ const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = #endif { - C4(A, B8G8R8A8_UNORM, BGRA8_UNORM, B, G, R, A, UNORM, A8B8G8R8, TD), + C4(A, B8G8R8A8_UNORM, BGRA8_UNORM, B, G, R, A, UNORM, A8B8G8R8, ID), F3(A, B8G8R8X8_UNORM, BGRX8_UNORM, B, G, R, xx, UNORM, A8B8G8R8, TD), C4(A, B8G8R8A
Re: [Mesa-dev] [PATCH RFC 2/8] nvc0: bind images for 3d/cp shaders on GM107+
On Mon, Jul 18, 2016 at 4:55 PM, Samuel Pitoiset wrote: > On Maxwell, images binding is slightly different (and much better) > regarding Fermi and Kepler because a texture view needs to be uploaded > for each image and this is going to simplify the thing a lot. > > Signed-off-by: Samuel Pitoiset > --- > src/gallium/drivers/nouveau/nvc0/nvc0_context.c | 5 +- > src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 4 + > src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 10 ++- > src/gallium/drivers/nouveau/nvc0/nvc0_tex.c | 110 > ++-- > src/gallium/drivers/nouveau/nvc0/nve4_compute.c | 91 ++-- > 5 files changed, 202 insertions(+), 18 deletions(-) > > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c > b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c > index 1137e6c..4bd240b 100644 > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c > @@ -161,8 +161,11 @@ nvc0_context_unreference_resources(struct nvc0_context > *nvc0) >for (i = 0; i < NVC0_MAX_BUFFERS; ++i) > pipe_resource_reference(&nvc0->buffers[s][i].buffer, NULL); > > - for (i = 0; i < NVC0_MAX_IMAGES; ++i) > + for (i = 0; i < NVC0_MAX_IMAGES; ++i) { > pipe_resource_reference(&nvc0->images[s][i].resource, NULL); > + if (nvc0->screen->base.class_3d >= GM107_3D_CLASS) > +pipe_sampler_view_reference(&nvc0->images_tic[s][i], NULL); > + } > } > > for (s = 0; s < 2; ++s) { > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h > b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h > index 4b73ec3..1d9fca1 100644 > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h > @@ -246,6 +246,7 @@ struct nvc0_context { > uint32_t buffers_valid[6]; > > struct pipe_image_view images[6][NVC0_MAX_IMAGES]; > + struct pipe_sampler_view *images_tic[6][NVC0_MAX_IMAGES]; /* GM107+ */ > uint16_t images_dirty[6]; > uint16_t images_valid[6]; > > @@ -349,6 +350,9 @@ struct pipe_sampler_view * > nvc0_create_sampler_view(struct pipe_context *, > struct pipe_resource *, > const struct pipe_sampler_view *); > +struct pipe_sampler_view * > +gm107_create_texture_view_from_image(struct pipe_context *, > + struct pipe_image_view *); > > /* nvc0_transfer.c */ > void > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c > b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c > index 441cfc9..98becf4 100644 > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c > @@ -1303,8 +1303,16 @@ nvc0_bind_images_range(struct nvc0_context *nvc0, > const unsigned s, >mask = ((1 << nr) - 1) << start; >if (!(nvc0->images_valid[s] & mask)) > return false; > - for (i = start; i < end; ++i) > + for (i = start; i < end; ++i) { > pipe_resource_reference(&nvc0->images[s][i].resource, NULL); > + if (nvc0->screen->base.class_3d >= GM107_3D_CLASS) { > +struct nv50_tic_entry *old = > nv50_tic_entry(nvc0->images_tic[s][i]); > +if (old) { > + nvc0_screen_tic_unlock(nvc0->screen, old); > + pipe_sampler_view_reference(&nvc0->images_tic[s][i], NULL); > +} > + } > + } >nvc0->images_valid[s] &= ~mask; > } > nvc0->images_dirty[s] |= mask; > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c > b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c > index 5f7bba8..efbaacf 100644 > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c > @@ -236,6 +236,38 @@ gm107_create_texture_view(struct pipe_context *pipe, > return &view->pipe; > } > > +struct pipe_sampler_view * > +gm107_create_texture_view_from_image(struct pipe_context *pipe, > + struct pipe_image_view *view) > +{ > + struct nv04_resource *res = nv04_resource(view->resource); > + enum pipe_texture_target target = res->base.target; > + struct pipe_sampler_view templ = {}; > + uint32_t flags = 0; > + > + if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY) > + target = PIPE_TEXTURE_2D_ARRAY; > + > + templ.format = view->format; > + templ.swizzle_r = PIPE_SWIZZLE_X; > + templ.swizzle_g = PIPE_SWIZZLE_Y; > + templ.swizzle_b = PIPE_SWIZZLE_Z; > + templ.swizzle_a = PIPE_SWIZZLE_W; > + > + if (target == PIPE_BUFFER) { > + templ.u.buf.first_element = view->u.buf.first_element; > + templ.u.buf.last_element = view->u.buf.last_element; > + } else { > + templ.u.tex.first_layer = view->u.tex.first_layer; > + templ.u.tex.last_layer = view->u.tex.last_layer; > + templ.u.tex.first_level = templ.u.tex.last_level = view->u.tex.level; > + } > + > + flags = res->base.last_
Re: [Mesa-dev] [PATCH v3] glsl: reuse main extension table to appropriately restrict extensions
Well, I have a basic review on this from Eric Engestrom, who is not a mesa expert (yet?) but has been giving out a lot of good review comments lately, and nobody else has piped up saying they hate this, so I'm going to push this in the next few days unless I hear any objections. IMHO this is a nice simplification of the glsl parser boilerplate, and removes the oft-forgotten glcpp annoyance. On Tue, Jul 12, 2016 at 11:07 AM, Ilia Mirkin wrote: > ping^2 > > On Tue, Jul 5, 2016 at 6:41 PM, Ilia Mirkin wrote: >> ping >> >> On Fri, Jun 24, 2016 at 1:42 AM, Ilia Mirkin wrote: >>> Previously we were only restricting based on ES/non-ES-ness and whether >>> the overall enable bit had been flipped on. However we have been adding >>> more fine-grained restrictions, such as based on compat profiles, as >>> well as specific ES versions. Most of the time this doesn't matter, but >>> it can create awkward situations and duplication of logic. >>> >>> Here we separate the main extension table into a separate object file, >>> linked to the glsl compiler, which makes use of it with a custom >>> function which takes the ES-ness of the shader into account (thus >>> allowing desktop shaders to properly use ES extensions that would >>> otherwise have been disallowed.) >>> >>> The effect of this change should be nil in most cases. However in some >>> situations, extensions like GL_ARB_gpu_shader5 which were formerly >>> available in compat contexts on the GLSL side of things will now become >>> inaccessible. >>> >>> Signed-off-by: Ilia Mirkin >>> Reviewed-by: Eric Engestrom (v2) >>> v2 -> v3: integrate glcpp defines into the same mechanism >>> --- >>> >>> FWIW I hate the method I had to invent to get this information to >>> glcpp. A callback that takes a callback. Ugh. Sorry. If someone can >>> come up with something cleaner, I'm all ears. >>> >>> This does appear to pass some basic testing. >>> >>> src/Makefile.am | 1 + >>> src/compiler/SConscript.glsl | 2 + >>> src/compiler/glsl/glcpp/glcpp-parse.y| 204 +- >>> src/compiler/glsl/glcpp/glcpp.c | 2 +- >>> src/compiler/glsl/glcpp/glcpp.h | 19 ++- >>> src/compiler/glsl/glcpp/pp.c | 6 +- >>> src/compiler/glsl/glsl_parser_extras.cpp | 283 >>> +-- >>> src/compiler/glsl/glsl_parser_extras.h | 17 +- >>> src/compiler/glsl/test_optpass.cpp | 2 +- >>> src/mesa/Android.libmesa_glsl_utils.mk | 2 + >>> src/mesa/Makefile.sources| 1 + >>> src/mesa/main/extensions.c | 33 +--- >>> src/mesa/main/extensions.h | 1 + >>> src/mesa/main/extensions_table.c | 51 ++ >>> 14 files changed, 269 insertions(+), 355 deletions(-) >>> create mode 100644 src/mesa/main/extensions_table.c >>> >>> diff --git a/src/Makefile.am b/src/Makefile.am >>> index 32372da..d38f7c4 100644 >>> --- a/src/Makefile.am >>> +++ b/src/Makefile.am >>> @@ -114,6 +114,7 @@ AM_CPPFLAGS = \ >>> noinst_LTLIBRARIES = libglsl_util.la >>> >>> libglsl_util_la_SOURCES = \ >>> + mesa/main/extensions_table.c \ >>> mesa/main/imports.c \ >>> mesa/program/prog_hash_table.c \ >>> mesa/program/symbol_table.c \ >>> diff --git a/src/compiler/SConscript.glsl b/src/compiler/SConscript.glsl >>> index 4252ce1..31d8f6d 100644 >>> --- a/src/compiler/SConscript.glsl >>> +++ b/src/compiler/SConscript.glsl >>> @@ -70,6 +70,7 @@ if env['msvc']: >>> # Copy these files to avoid generation object files into src/mesa/program >>> env.Prepend(CPPPATH = ['#src/mesa/main']) >>> env.Command('glsl/imports.c', '#src/mesa/main/imports.c', Copy('$TARGET', >>> '$SOURCE')) >>> +env.Command('glsl/extensions_table.c', >>> '#src/mesa/main/extensions_table.c', Copy('$TARGET', '$SOURCE')) >>> # Copy these files to avoid generation object files into src/mesa/program >>> env.Prepend(CPPPATH = ['#src/mesa/program']) >>> env.Command('glsl/prog_hash_table.c', >>> '#src/mesa/program/prog_hash_table.c', Copy('$TARGET', '$SOURCE')) >>> @@ -79,6 +80,7 @@ env.Command('glsl/dummy_errors.c', >>> '#src/mesa/program/dummy_errors.c', Copy('$TA >>> compiler_objs = env.StaticObject(source_lists['GLSL_COMPILER_CXX_FILES']) >>> >>> mesa_objs = env.StaticObject([ >>> +'glsl/extensions_table.c', >>> 'glsl/imports.c', >>> 'glsl/prog_hash_table.c', >>> 'glsl/symbol_table.c', >>> diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y >>> b/src/compiler/glsl/glcpp/glcpp-parse.y >>> index b9d690d..ca376d9 100644 >>> --- a/src/compiler/glsl/glcpp/glcpp-parse.y >>> +++ b/src/compiler/glsl/glcpp/glcpp-parse.y >>> @@ -1311,7 +1311,7 @@ add_builtin_define(glcpp_parser_t *parser, const char >>> *name, int value) >>> } >>> >>> glcpp_parser_t * >>> -glcpp_parser_create(const struct gl_extensions *extensions, gl_api api) >>> +glcpp_parser_create(glcpp_extension_iterator extensions, void *state, >>> gl_api api) >>> { >>> glcpp_pa
Re: [Mesa-dev] [PATCH] anv: Drop unnecessary is_haswell check in state base address code.
On Mon, Jul 18, 2016 at 1:41 PM, Kenneth Graunke wrote: > Both branches are identical. Fixes a Coverity warning. > > CID: 1364155 > > Signed-off-by: Kenneth Graunke > --- > src/intel/vulkan/anv_cmd_buffer.c | 5 + > 1 file changed, 1 insertion(+), 4 deletions(-) > > diff --git a/src/intel/vulkan/anv_cmd_buffer.c > b/src/intel/vulkan/anv_cmd_buffer.c > index 6256df8..ce4266f 100644 > --- a/src/intel/vulkan/anv_cmd_buffer.c > +++ b/src/intel/vulkan/anv_cmd_buffer.c > @@ -358,10 +358,7 @@ anv_cmd_buffer_emit_state_base_address(struct > anv_cmd_buffer *cmd_buffer) > { > switch (cmd_buffer->device->info.gen) { > case 7: > - if (cmd_buffer->device->info.is_haswell) > - return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); > - else > - return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); > + return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); > case 8: >return gen8_cmd_buffer_emit_state_base_address(cmd_buffer); > case 9: > -- > 2.9.0 This was added in commit 6f613abc2b, which looks really sketchy. The patch adds a prototype for gen75_cmd_buffer_emit_state_base_address(), which I assume was intended to be used in this if-statement, and the commit summary says "Incidentally, this should fix MOCS settings for dynamic and surface state on Haswell." but other than the useless is_haswell check your patch removes it didn't touch anything related to Haswell. Jason should really take a look. Clearly /something/ is wrong. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH RFC 6/8] gm107/ir: add emission for SUREDx
Signed-off-by: Samuel Pitoiset --- .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 50 ++ 1 file changed, 50 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index 871ffd2..9818c4d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -205,6 +205,7 @@ private: void emitSUHandle(const int s); void emitSUSTx(); void emitSULDx(); + void emitSUREDx(); }; /*** @@ -2913,6 +2914,51 @@ CodeEmitterGM107::emitSULDx() emitSUHandle(1); } + +void +CodeEmitterGM107::emitSUREDx() +{ + const TexInstruction *insn = this->insn->asTex(); + uint8_t type = 0, subOp; + + if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) + emitInsn(0xeac0); + else + emitInsn(0xea60); + + if (insn->op == OP_SUREDB) + emitField(0x34, 1, 1); + emitSUTarget(); + + // destination type + switch (insn->dType) { + case TYPE_S32: type = 1; break; + case TYPE_U64: type = 2; break; + case TYPE_F32: type = 3; break; + case TYPE_S64: type = 5; break; + default: + assert(insn->dType == TYPE_U32); + break; + } + + // atomic operation + if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) { + subOp = 0; + } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) { + subOp = 8; + } else { + subOp = insn->subOp; + } + + emitField(0x24, 3, type); + emitField(0x1d, 4, subOp); + emitGPR (0x14, insn->src(1)); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); + + emitSUHandle(2); +} + /*** * assembler front-end **/ @@ -3235,6 +3281,10 @@ CodeEmitterGM107::emitInstruction(Instruction *i) case OP_SULDP: emitSULDx(); break; + case OP_SUREDB: + case OP_SUREDP: + emitSUREDx(); + break; default: assert(!"invalid opcode"); emitNOP(); -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH RFC 7/8] nv50/ir: print OP_SUREDB subops in debug mode
Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index ae0dd78..22f2f5d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -570,6 +570,7 @@ void Instruction::print() const PRINT("%s ", interpStr[ipa]); switch (op) { case OP_SUREDP: + case OP_SUREDB: case OP_ATOM: if (subOp < ARRAY_SIZE(atomSubOpStr)) PRINT("%s ", atomSubOpStr[subOp]); -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH RFC 8/8] nvc0: disable MS images on GM107+
MS images have to be handled explicitly and I don't plan to implement them for now. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index f681631..a0d3495 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -90,6 +90,13 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen, PIPE_BIND_LINEAR | PIPE_BIND_SHARED); + if (bindings & PIPE_BIND_SHADER_IMAGE && + nouveau_screen(pscreen)->class_3d >= GM107_3D_CLASS) { + /* MS images are currently unsupported on Maxwell because they have to + * be handled explicitly. */ + return false; + } + return (( nvc0_format_table[format].usage | nvc0_vertex_format[format].usage) & bindings) == bindings; } -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH RFC 2/8] nvc0: bind images for 3d/cp shaders on GM107+
On Maxwell, images binding is slightly different (and much better) regarding Fermi and Kepler because a texture view needs to be uploaded for each image and this is going to simplify the thing a lot. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_context.c | 5 +- src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 4 + src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 10 ++- src/gallium/drivers/nouveau/nvc0/nvc0_tex.c | 110 ++-- src/gallium/drivers/nouveau/nvc0/nve4_compute.c | 91 ++-- 5 files changed, 202 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c index 1137e6c..4bd240b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c @@ -161,8 +161,11 @@ nvc0_context_unreference_resources(struct nvc0_context *nvc0) for (i = 0; i < NVC0_MAX_BUFFERS; ++i) pipe_resource_reference(&nvc0->buffers[s][i].buffer, NULL); - for (i = 0; i < NVC0_MAX_IMAGES; ++i) + for (i = 0; i < NVC0_MAX_IMAGES; ++i) { pipe_resource_reference(&nvc0->images[s][i].resource, NULL); + if (nvc0->screen->base.class_3d >= GM107_3D_CLASS) +pipe_sampler_view_reference(&nvc0->images_tic[s][i], NULL); + } } for (s = 0; s < 2; ++s) { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index 4b73ec3..1d9fca1 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -246,6 +246,7 @@ struct nvc0_context { uint32_t buffers_valid[6]; struct pipe_image_view images[6][NVC0_MAX_IMAGES]; + struct pipe_sampler_view *images_tic[6][NVC0_MAX_IMAGES]; /* GM107+ */ uint16_t images_dirty[6]; uint16_t images_valid[6]; @@ -349,6 +350,9 @@ struct pipe_sampler_view * nvc0_create_sampler_view(struct pipe_context *, struct pipe_resource *, const struct pipe_sampler_view *); +struct pipe_sampler_view * +gm107_create_texture_view_from_image(struct pipe_context *, + struct pipe_image_view *); /* nvc0_transfer.c */ void diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c index 441cfc9..98becf4 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -1303,8 +1303,16 @@ nvc0_bind_images_range(struct nvc0_context *nvc0, const unsigned s, mask = ((1 << nr) - 1) << start; if (!(nvc0->images_valid[s] & mask)) return false; - for (i = start; i < end; ++i) + for (i = start; i < end; ++i) { pipe_resource_reference(&nvc0->images[s][i].resource, NULL); + if (nvc0->screen->base.class_3d >= GM107_3D_CLASS) { +struct nv50_tic_entry *old = nv50_tic_entry(nvc0->images_tic[s][i]); +if (old) { + nvc0_screen_tic_unlock(nvc0->screen, old); + pipe_sampler_view_reference(&nvc0->images_tic[s][i], NULL); +} + } + } nvc0->images_valid[s] &= ~mask; } nvc0->images_dirty[s] |= mask; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c index 5f7bba8..efbaacf 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c @@ -236,6 +236,38 @@ gm107_create_texture_view(struct pipe_context *pipe, return &view->pipe; } +struct pipe_sampler_view * +gm107_create_texture_view_from_image(struct pipe_context *pipe, + struct pipe_image_view *view) +{ + struct nv04_resource *res = nv04_resource(view->resource); + enum pipe_texture_target target = res->base.target; + struct pipe_sampler_view templ = {}; + uint32_t flags = 0; + + if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY) + target = PIPE_TEXTURE_2D_ARRAY; + + templ.format = view->format; + templ.swizzle_r = PIPE_SWIZZLE_X; + templ.swizzle_g = PIPE_SWIZZLE_Y; + templ.swizzle_b = PIPE_SWIZZLE_Z; + templ.swizzle_a = PIPE_SWIZZLE_W; + + if (target == PIPE_BUFFER) { + templ.u.buf.first_element = view->u.buf.first_element; + templ.u.buf.last_element = view->u.buf.last_element; + } else { + templ.u.tex.first_layer = view->u.tex.first_layer; + templ.u.tex.last_layer = view->u.tex.last_layer; + templ.u.tex.first_level = templ.u.tex.last_level = view->u.tex.level; + } + + flags = res->base.last_level ? 0 : NV50_TEXVIEW_SCALED_COORDS; + + return gm107_create_texture_view(pipe, &res->base, &templ, flags, target); +} + static struct pipe_sampler_view * gf100_create_texture_view(struct pipe_context *pipe, struct pipe_resource *texture, @@ -109
[Mesa-dev] [PATCH RFC 3/8] gm107/ir: lower surface operations
Signed-off-by: Samuel Pitoiset --- .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 76 +- .../nouveau/codegen/nv50_ir_lowering_nvc0.h| 2 + 2 files changed, 77 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 18955eb..b7dc624 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -2108,6 +2108,78 @@ NVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su) } } +void +NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su) +{ + const int slot = su->tex.r; + const int dim = su->tex.target.getDim(); + const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube()); + Value *ind = su->getIndirectR(); + int pos = 0; + + bld.setPosition(su, false); + + // add texture handle + switch (su->op) { + case OP_SUSTP: + pos = 4; + break; + case OP_SUREDP: + pos = (su->subOp == NV50_IR_SUBOP_ATOM_CAS) ? 2 : 1; + break; + default: + assert(pos == 0); + break; + } + su->setSrc(arg + pos, loadTexHandle(ind, slot + 32)); + + // prevent read fault when the image is not actually bound + CmpInstruction *pred = + bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), +TYPE_U32, bld.mkImm(0), +loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR)); + if (su->op != OP_SUSTP && su->tex.format) { + const TexInstruction::ImgFormatDesc *format = su->tex.format; + int blockwidth = format->bits[0] + format->bits[1] + + format->bits[2] + format->bits[3]; + + assert(format->components != 0); + // make sure that the format doesn't mismatch when it's not FMT_NONE + bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0), +TYPE_U32, bld.loadImm(NULL, blockwidth / 8), +loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE), +pred->getDef(0)); + } + su->setPredicate(CC_NOT_P, pred->getDef(0)); +} + +void +NVC0LoweringPass::handleSurfaceOpGM107(TexInstruction *su) +{ + processSurfaceCoordsGM107(su); + + if (su->op == OP_SULDP) + convertSurfaceFormat(su); + + if (su->op == OP_SUREDP) { + Value *def = su->getDef(0); + + su->op = OP_SUREDB; + su->setDef(0, bld.getSSA()); + + bld.setPosition(su, true); + + // make sure to initialize dst value when the atomic operation is not + // performed + Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0)); + + assert(su->cc == CC_NOT_P); + mov->setPredicate(CC_P, su->getPredicate()); + + bld.mkOp2(OP_UNION, TYPE_U32, def, su->getDef(0), mov->getDef(0)); + } +} + bool NVC0LoweringPass::handleWRSV(Instruction *i) { @@ -2600,7 +2672,9 @@ NVC0LoweringPass::visit(Instruction *i) case OP_SUSTP: case OP_SUREDB: case OP_SUREDP: - if (targ->getChipset() >= NVISA_GK104_CHIPSET) + if (targ->getChipset() >= NVISA_GM107_CHIPSET) + handleSurfaceOpGM107(i->asTex()); + else if (targ->getChipset() >= NVISA_GK104_CHIPSET) handleSurfaceOpNVE4(i->asTex()); else handleSurfaceOpNVC0(i->asTex()); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index 4d7d8cc..104bc03 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -106,6 +106,7 @@ protected: bool handleSUQ(TexInstruction *); bool handleATOM(Instruction *); bool handleCasExch(Instruction *, bool needCctl); + void handleSurfaceOpGM107(TexInstruction *); void handleSurfaceOpNVE4(TexInstruction *); void handleSurfaceOpNVC0(TexInstruction *); void handleSharedATOM(Instruction *); @@ -135,6 +136,7 @@ private: Value *loadTexHandle(Value *ptr, unsigned int slot); void adjustCoordinatesMS(TexInstruction *); + void processSurfaceCoordsGM107(TexInstruction *); void processSurfaceCoordsNVE4(TexInstruction *); void processSurfaceCoordsNVC0(TexInstruction *); void convertSurfaceFormat(TexInstruction *); -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH RFC 4/8] gm107/ra: fix constraints for surface operations
Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 25 -- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index 63fe9c0..2d3486b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -2093,8 +2093,29 @@ RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex) textureMask(tex); condenseDefs(tex); - if (tex->op == OP_SUSTB || tex->op == OP_SUSTP) { - condenseSrcs(tex, 3, (3 + typeSizeof(tex->dType) / 4) - 1); + if (isSurfaceOp(tex->op)) { + int s = tex->tex.target.getDim() + + (tex->tex.target.isArray() || tex->tex.target.isCube()); + int n = 0; + + switch (tex->op) { + case OP_SUSTB: + case OP_SUSTP: + n = 4; + break; + case OP_SUREDB: + case OP_SUREDP: + if (tex->subOp == NV50_IR_SUBOP_ATOM_CAS) +n = 2; + break; + default: + break; + } + + if (s > 1) + condenseSrcs(tex, 0, s - 1); + if (n > 1) + condenseSrcs(tex, 1, n); // do not condense the tex handle } else if (isTextureOp(tex->op)) { if (tex->op != OP_TXQ) { -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH RFC 5/8] gm107/ir: add emission for SUSTx and SULDx
Signed-off-by: Samuel Pitoiset --- .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 105 + 1 file changed, 105 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index f1ba27a..871ffd2 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -200,6 +200,11 @@ private: void emitMEMBAR(); void emitVOTE(); + + void emitSUTarget(); + void emitSUHandle(const int s); + void emitSUSTx(); + void emitSULDx(); }; /*** @@ -2816,6 +2821,98 @@ CodeEmitterGM107::emitVOTE() emitPRED (0x27, insn->src(0)); } +void +CodeEmitterGM107::emitSUTarget() +{ + const TexInstruction *insn = this->insn->asTex(); + int target = 0; + + assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP); + + if (insn->tex.target == TEX_TARGET_BUFFER) { + target = 2; + } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) { + target = 4; + } else if (insn->tex.target == TEX_TARGET_2D || + insn->tex.target == TEX_TARGET_RECT) { + target = 6; + } else if (insn->tex.target == TEX_TARGET_2D_ARRAY || + insn->tex.target == TEX_TARGET_CUBE || + insn->tex.target == TEX_TARGET_CUBE_ARRAY) { + target = 8; + } else if (insn->tex.target == TEX_TARGET_3D || + insn->tex.target == TEX_TARGET_CUBE_ARRAY) { + target = 10; + } else { + assert(insn->tex.target == TEX_TARGET_1D); + } + emitField(0x20, 4, target); +} + +void +CodeEmitterGM107::emitSUHandle(const int s) +{ + const TexInstruction *insn = this->insn->asTex(); + + assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP); + + if (insn->src(s).getFile() == FILE_GPR) { + emitGPR(0x27, insn->src(s)); + } else { + ImmediateValue *imm = insn->getSrc(s)->asImm(); + assert(imm); + emitField(0x33, 1, 1); + emitField(0x24, 13, imm->reg.data.u32); + } +} + +void +CodeEmitterGM107::emitSUSTx() +{ + const TexInstruction *insn = this->insn->asTex(); + + emitInsn(0xeb20); + if (insn->op == OP_SUSTB) + emitField(0x34, 1, 1); + emitSUTarget(); + + emitLDSTc(0x18); + emitField(0x14, 4, 0xf); // rgba + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->src(1)); + + emitSUHandle(2); +} + +void +CodeEmitterGM107::emitSULDx() +{ + const TexInstruction *insn = this->insn->asTex(); + int type = 0; + + emitInsn(0xeb00); + if (insn->op == OP_SULDB) + emitField(0x34, 1, 1); + emitSUTarget(); + + switch (insn->dType) { + case TYPE_S8: type = 1; break; + case TYPE_U16: type = 2; break; + case TYPE_S16: type = 3; break; + case TYPE_U32: type = 4; break; + case TYPE_U64: type = 5; break; + case TYPE_B128: type = 6; break; + default: + assert(insn->dType == TYPE_U8); + break; + } + emitLDSTc(0x18); + emitField(0x14, 3, type); + emitGPR (0x00, insn->def(0)); + emitGPR (0x08, insn->src(0)); + + emitSUHandle(1); +} /*** * assembler front-end **/ @@ -3130,6 +3227,14 @@ CodeEmitterGM107::emitInstruction(Instruction *i) case OP_VOTE: emitVOTE(); break; + case OP_SUSTB: + case OP_SUSTP: + emitSUSTx(); + break; + case OP_SULDB: + case OP_SULDP: + emitSULDx(); + break; default: assert(!"invalid opcode"); emitNOP(); -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH RFC 1/8] nvc0: increase the tex handles area size in the driver cb
Currently, we can store 32 tex handles of 32-bits integer each and that fits perfectly with the underlying hardware except on GM107+ which requires to upload a texture view for each images. This patch increases the number of storable texture handles in the driver constant buffer from 32 to 40 because we expose 8 images. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 22 +++--- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index 7acd477..4b73ec3 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -108,34 +108,34 @@ /* XXX: Figure out what this UNK data is. */ #define NVC0_CB_AUX_UNK_INFO0x000 #define NVC0_CB_AUX_UNK_SIZE(8 * 4) -/* 32 textures handles, at 1 32-bits integer each */ +/* 40 textures handles (8 for GM107+ images only), at 1 32-bits integer each */ #define NVC0_CB_AUX_TEX_INFO(i) 0x020 + (i) * 4 -#define NVC0_CB_AUX_TEX_SIZE(32 * 4) +#define NVC0_CB_AUX_TEX_SIZE(40 * 4) /* 8 sets of 32-bits coordinate offsets */ -#define NVC0_CB_AUX_MS_INFO 0x0a0 +#define NVC0_CB_AUX_MS_INFO 0x0c0 #define NVC0_CB_AUX_MS_SIZE (8 * 2 * 4) /* block/grid size, at 3 32-bits integers each, gridid and work_dim */ -#define NVC0_CB_AUX_GRID_INFO(i)0x0e0 + (i) * 4 /* CP */ +#define NVC0_CB_AUX_GRID_INFO(i)0x100 + (i) * 4 /* CP */ #define NVC0_CB_AUX_GRID_SIZE (8 * 4) /* 8 user clip planes, at 4 32-bits floats each */ -#define NVC0_CB_AUX_UCP_INFO0x100 +#define NVC0_CB_AUX_UCP_INFO0x120 #define NVC0_CB_AUX_UCP_SIZE(PIPE_MAX_CLIP_PLANES * 4 * 4) /* 13 ubos, at 4 32-bits integer each */ -#define NVC0_CB_AUX_UBO_INFO(i) 0x100 + (i) * 4 * 4 /* CP */ +#define NVC0_CB_AUX_UBO_INFO(i) 0x120 + (i) * 4 * 4 /* CP */ #define NVC0_CB_AUX_UBO_SIZE((NVC0_MAX_PIPE_CONSTBUFS - 1) * 4 * 4) /* 8 sets of 32-bits integer pairs sample offsets */ -#define NVC0_CB_AUX_SAMPLE_INFO 0x180 /* FP */ +#define NVC0_CB_AUX_SAMPLE_INFO 0x1a0 /* FP */ #define NVC0_CB_AUX_SAMPLE_SIZE (8 * 4 * 2) /* draw parameters (index bais, base instance, drawid) */ -#define NVC0_CB_AUX_DRAW_INFO 0x180 /* VP */ +#define NVC0_CB_AUX_DRAW_INFO 0x1a0 /* VP */ /* 32 user buffers, at 4 32-bits integers each */ -#define NVC0_CB_AUX_BUF_INFO(i) 0x200 + (i) * 4 * 4 +#define NVC0_CB_AUX_BUF_INFO(i) 0x220 + (i) * 4 * 4 #define NVC0_CB_AUX_BUF_SIZE(NVC0_MAX_BUFFERS * 4 * 4) /* 8 surfaces, at 16 32-bits integers each */ -#define NVC0_CB_AUX_SU_INFO(i) 0x400 + (i) * 16 * 4 +#define NVC0_CB_AUX_SU_INFO(i) 0x420 + (i) * 16 * 4 #define NVC0_CB_AUX_SU_SIZE (NVC0_MAX_IMAGES * 16 * 4) /* 1 64-bits address and 1 32-bits sequence */ -#define NVC0_CB_AUX_MP_INFO 0x600 +#define NVC0_CB_AUX_MP_INFO 0x620 #define NVC0_CB_AUX_MP_SIZE 3 * 4 /* 4 32-bits floats for the vertex runout, put at the end */ #define NVC0_CB_AUX_RUNOUT_INFO NVC0_CB_USR_SIZE + (NVC0_CB_AUX_SIZE * 6) -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH RFC 0/8] nvc0: ARB_shader_image_load_store for Maxwell
Hi folks, This series adds support for ARB_shader_image_load_store (GL 4.2) and ARB_shader_image_size (GL 4.3) on Maxwell GPUs. Maxwell family is slightly different regarding Fermi and Kepler because it requires to use a texture view for each images. But this is actually quite fine because the underlying hardware will handle a bunch of things that we needed to do in software for previous generations. However, this series *WILL NOT ENABLE* ARB_shader_image_load_store by default for the following reasons: a) Some subtests in arb_shader_image_load_store-atomicity totally hangs the GPU because there is a race condition. The code is a loop which uses the result of the surface atomic operation as the condition. The only way for fixing this bad is to implement a sched calculator pass for Maxwell. This actually allows to define read/write barriers like the blob does for this specific case, and this might also improve performance. b) Maxwell currently only exposes GL 3.3 because tessellation has still not been figured out (I'm working on this but it's quite hard). So enabling the extension is not going to help until this part is done. To sum up, this series add "basic" support for images but it should work in most cases. Please review, Thanks! Samuel Pitoiset (8): nvc0: increase the tex handles area size in the driver cb nvc0: bind images for 3d/cp shaders on GM107+ gm107/ir: lower surface operations gm107/ra: fix constraints for surface operations gm107/ir: add emission for SUSTx and SULDx gm107/ir: add emission for SUREDx nv50/ir: print OP_SUREDB subops in debug mode nvc0: disable MS images on GM107+ .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 155 + .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 76 +- .../nouveau/codegen/nv50_ir_lowering_nvc0.h| 2 + .../drivers/nouveau/codegen/nv50_ir_print.cpp | 1 + src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 25 +++- src/gallium/drivers/nouveau/nvc0/nvc0_context.c| 5 +- src/gallium/drivers/nouveau/nvc0/nvc0_context.h| 26 ++-- src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 7 + src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 10 +- src/gallium/drivers/nouveau/nvc0/nvc0_tex.c| 110 ++- src/gallium/drivers/nouveau/nvc0/nve4_compute.c| 91 ++-- 11 files changed, 476 insertions(+), 32 deletions(-) -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mapi: Massage code to allow clang to compile.
On Mon, Jul 11, 2016 at 10:49 AM, Matt Turner wrote: > According to https://llvm.org/bugs/show_bug.cgi?id=19778#c3 this code > was violating the spec, resulting in it failing to compile. > > Cc: mesa-sta...@lists.freedesktop.org > Co-authored-by: Tomasz Paweł Gajc > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89599 > --- > I've tried for months to reproduce this, and I've still never been > able to on 64-bit builds. I can reproduce it on 32-bit however. > > On MSVC, this patch will have the effect of changing the variables > from static to extern. I do not know if this will adversely affect > anything, so this patch would benefit from MSVC testing. Ping... ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 89599] symbol 'x86_64_entry_start' is already defined when building with LLVM/clang
https://bugs.freedesktop.org/show_bug.cgi?id=89599 --- Comment #11 from Matt Turner --- I sent a modified version of Tomasz's patch last week to mesa-dev. Would anyone like to test it? [PATCH] mapi: Massage code to allow clang to compile. -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC] gallium/u_queue: add barrier function
On Mon, Jul 18, 2016 at 4:34 PM, Nicolai Hähnle wrote: > On 18.07.2016 22:25, Rob Clark wrote: >> >> Helper to block until all previous jobs are complete. >> --- >> So I think this might end up being useful to me in some cases.. but >> the implementation only works for a single threaded queue (which is >> all I need). I could also just put a helper in my driver code. >> >> Opinions? > > > What do you need it for? ISTR Marek had a half-finished patch for > dependencies, maybe that does what you need and is more expressive? no, I don't think dependencies would really help me.. This issue I'm chancing down is a race condition which, I think, amounts to we initially flush a batch when we don't think we need to wait for it to complete, and later get a flush_resource() or a flush() and realize we did actually need it to complete, but no longer have a ref to the batch (or it's contained fence).. BR, -R > Cheers, > Nicolai > > >> >> src/gallium/auxiliary/util/u_queue.c | 12 >> src/gallium/auxiliary/util/u_queue.h | 2 ++ >> 2 files changed, 14 insertions(+) >> >> diff --git a/src/gallium/auxiliary/util/u_queue.c >> b/src/gallium/auxiliary/util/u_queue.c >> index 838464f..861faca 100644 >> --- a/src/gallium/auxiliary/util/u_queue.c >> +++ b/src/gallium/auxiliary/util/u_queue.c >> @@ -242,3 +242,15 @@ util_queue_add_job(struct util_queue *queue, >> pipe_condvar_signal(queue->has_queued_cond); >> pipe_mutex_unlock(queue->lock); >> } >> + >> +static void dummy_execute(void *job, int thread_index) {} >> + >> +/* blocks until all previously queued jobs complete: */ >> +void util_queue_barrier(struct util_queue *queue) >> +{ >> + struct util_queue_fence fence; >> + util_queue_fence_init(&fence); >> + util_queue_add_job(queue, &fence /*dummy*/, &fence, dummy_execute, >> NULL); >> + util_queue_job_wait(&fence); >> + util_queue_fence_destroy(&fence); >> +} >> diff --git a/src/gallium/auxiliary/util/u_queue.h >> b/src/gallium/auxiliary/util/u_queue.h >> index 59646cc..8a22ee0 100644 >> --- a/src/gallium/auxiliary/util/u_queue.h >> +++ b/src/gallium/auxiliary/util/u_queue.h >> @@ -85,6 +85,8 @@ void util_queue_add_job(struct util_queue *queue, >> >> void util_queue_job_wait(struct util_queue_fence *fence); >> >> +void util_queue_barrier(struct util_queue *queue); >> + >> /* util_queue needs to be cleared to zeroes for this to work */ >> static inline bool >> util_queue_is_initialized(struct util_queue *queue) >> > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] anv: Drop unnecessary is_haswell check in state base address code.
Both branches are identical. Fixes a Coverity warning. CID: 1364155 Signed-off-by: Kenneth Graunke --- src/intel/vulkan/anv_cmd_buffer.c | 5 + 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 6256df8..ce4266f 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -358,10 +358,7 @@ anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) { switch (cmd_buffer->device->info.gen) { case 7: - if (cmd_buffer->device->info.is_haswell) - return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); - else - return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); + return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); case 8: return gen8_cmd_buffer_emit_state_base_address(cmd_buffer); case 9: -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC] gallium/u_queue: add barrier function
On 18.07.2016 22:25, Rob Clark wrote: Helper to block until all previous jobs are complete. --- So I think this might end up being useful to me in some cases.. but the implementation only works for a single threaded queue (which is all I need). I could also just put a helper in my driver code. Opinions? What do you need it for? ISTR Marek had a half-finished patch for dependencies, maybe that does what you need and is more expressive? Cheers, Nicolai src/gallium/auxiliary/util/u_queue.c | 12 src/gallium/auxiliary/util/u_queue.h | 2 ++ 2 files changed, 14 insertions(+) diff --git a/src/gallium/auxiliary/util/u_queue.c b/src/gallium/auxiliary/util/u_queue.c index 838464f..861faca 100644 --- a/src/gallium/auxiliary/util/u_queue.c +++ b/src/gallium/auxiliary/util/u_queue.c @@ -242,3 +242,15 @@ util_queue_add_job(struct util_queue *queue, pipe_condvar_signal(queue->has_queued_cond); pipe_mutex_unlock(queue->lock); } + +static void dummy_execute(void *job, int thread_index) {} + +/* blocks until all previously queued jobs complete: */ +void util_queue_barrier(struct util_queue *queue) +{ + struct util_queue_fence fence; + util_queue_fence_init(&fence); + util_queue_add_job(queue, &fence /*dummy*/, &fence, dummy_execute, NULL); + util_queue_job_wait(&fence); + util_queue_fence_destroy(&fence); +} diff --git a/src/gallium/auxiliary/util/u_queue.h b/src/gallium/auxiliary/util/u_queue.h index 59646cc..8a22ee0 100644 --- a/src/gallium/auxiliary/util/u_queue.h +++ b/src/gallium/auxiliary/util/u_queue.h @@ -85,6 +85,8 @@ void util_queue_add_job(struct util_queue *queue, void util_queue_job_wait(struct util_queue_fence *fence); +void util_queue_barrier(struct util_queue *queue); + /* util_queue needs to be cleared to zeroes for this to work */ static inline bool util_queue_is_initialized(struct util_queue *queue) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 7/7] i965: Delete the FS_OPCODE_INTERPOLATE_AT_CENTROID virtual opcode.
We no longer use this message. As far as I can tell, it's fairly useless - the equivalent information is provided in the payload. Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_defines.h| 1 - src/mesa/drivers/dri/i965/brw_fs.cpp | 2 -- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 5 - src/mesa/drivers/dri/i965/brw_shader.cpp | 2 -- 4 files changed, 10 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index b5a259e..2814fa7 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1120,7 +1120,6 @@ enum opcode { FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, FS_OPCODE_PLACEHOLDER_HALT, - FS_OPCODE_INTERPOLATE_AT_CENTROID, FS_OPCODE_INTERPOLATE_AT_SAMPLE, FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 06007fe..120d6dd 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -250,7 +250,6 @@ fs_inst::is_send_from_grf() const switch (opcode) { case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: case SHADER_OPCODE_SHADER_TIME_ADD: - case FS_OPCODE_INTERPOLATE_AT_CENTROID: case FS_OPCODE_INTERPOLATE_AT_SAMPLE: case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: @@ -4785,7 +4784,6 @@ get_lowered_simd_width(const struct brw_device_info *devinfo, case FS_OPCODE_PACK_HALF_2x16_SPLIT: case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X: case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y: - case FS_OPCODE_INTERPOLATE_AT_CENTROID: case FS_OPCODE_INTERPOLATE_AT_SAMPLE: case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 1e9c7da..a390184 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -2054,11 +2054,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) } break; - case FS_OPCODE_INTERPOLATE_AT_CENTROID: - generate_pixel_interpolator_query(inst, dst, src[0], src[1], - GEN7_PIXEL_INTERPOLATOR_LOC_CENTROID); - break; - case FS_OPCODE_INTERPOLATE_AT_SAMPLE: generate_pixel_interpolator_query(inst, dst, src[0], src[1], GEN7_PIXEL_INTERPOLATOR_LOC_SAMPLE); diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index f3b5487..559e44c 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -367,8 +367,6 @@ brw_instruction_name(const struct brw_device_info *devinfo, enum opcode op) case FS_OPCODE_PLACEHOLDER_HALT: return "placeholder_halt"; - case FS_OPCODE_INTERPOLATE_AT_CENTROID: - return "interp_centroid"; case FS_OPCODE_INTERPOLATE_AT_SAMPLE: return "interp_sample"; case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/7] i965: Move load_interpolated_input/barycentric_* intrinsics to the top.
Currently, i965 interpolates all FS inputs at the top of the program. This has advantages and disadvantages, but I'd like to keep that policy while reworking this code. We can consider changing it independently. The next patch will make the compiler generate PLN instructions "on the fly", when it encounters an input load intrinsic, rather than doing it for all inputs at the start of the program. To emulate this behavior, we introduce an ugly pass to move all NIR load_interpolated_input and payload-based (not interpolator message) load_barycentric_* intrinsics to the shader's start block. This helps avoid regressions in shader-db for cases such as: if (...) { ...load some input... } else { ...load that same input... } which CSE can't handle, because there's no dominance relationship between the two loads. Because the start block dominates all others, we can CSE all inputs and emit PLNs exactly once, as we did before. Ideally, global value numbering would eliminate these redundant loads, while not forcing them all the way to the start block. When that lands, we should consider dropping this hacky pass. Again, this pass currently does nothing, as i965 doesn't generate these intrinsics yet. But it will shortly, and I figured I'd separate this code as it's relatively self-contained. Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs.cpp | 78 1 file changed, 78 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index ea6616b..94127bc 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -6400,6 +6400,83 @@ computed_depth_mode(const nir_shader *shader) } /** + * Move load_interpolated_input with simple (payload-based) barycentric modes + * to the top of the program so we don't emit multiple PLNs for the same input. + * + * This works around CSE not being able to handle non-dominating cases + * such as: + * + *if (...) { + * interpolate input + *} else { + * interpolate the same exact input + *} + * + * This should be replaced by global value numbering someday. + */ +void +move_interpolation_to_top(nir_shader *nir) +{ + nir_foreach_function(f, nir) { + if (!f->impl) + continue; + + nir_builder b; + nir_builder_init(&b, f->impl); + b.cursor = nir_before_block(nir_start_block(f->impl)); + + nir_foreach_block(block, f->impl) { + nir_foreach_instr_safe(instr, block) { +if (instr->type != nir_instr_type_intrinsic) + continue; + +nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr); +if (load->intrinsic != nir_intrinsic_load_interpolated_input) + continue; + +nir_intrinsic_instr *bary = + nir_instr_as_intrinsic(load->src[0].ssa->parent_instr); + +/* Leave interpolateAtSample/Offset() where it is. */ +if (bary->intrinsic == nir_intrinsic_load_barycentric_at_sample || +bary->intrinsic == nir_intrinsic_load_barycentric_at_offset) + continue; + +/* Make a new load_barycentric_* intrinsic at the top */ +nir_ssa_def *top_bary = + nir_load_barycentric(&b, bary->intrinsic, +nir_intrinsic_interp_mode(bary)); + +/* Make a new load_intrinsic_input at the top */ +nir_intrinsic_instr *top_load = nir_intrinsic_instr_create(nir, + nir_intrinsic_load_interpolated_input); +top_load->num_components = load->num_components; +top_load->src[0] = nir_src_for_ssa(top_bary); +/* We don't support indirects today - otherwise we might not + * be able to move this to the top. add_const_offset_to_base + * guarantees the offset will be 0. + */ +assert(nir_src_as_const_value(load->src[1]) && + nir_src_as_const_value(load->src[1])->u32[0] == 0); +top_load->src[1] = nir_src_for_ssa(nir_imm_int(&b, 0)); +top_load->const_index[0] = load->const_index[0]; +top_load->const_index[1] = load->const_index[1]; +nir_ssa_dest_init(&top_load->instr, &top_load->dest, + load->dest.ssa.num_components, + load->dest.ssa.bit_size, NULL); + +nir_ssa_def_rewrite_uses(&load->dest.ssa, + nir_src_for_ssa(&top_load->dest.ssa)); +nir_builder_instr_insert(&b, &top_load->instr); + } + } + nir_metadata_preserve(f->impl, (nir_metadata) +((unsigned) nir_metadata_block_index | + (unsigned) nir_metadata_dominance)); + } +} + +/** * Apply default interpolation settings to FS inputs which don't specify any. */ static void @@ -6506,6 +658
[Mesa-dev] [PATCH 4/7] i965: Add a pass to demote sample interpolation intrinsics.
When working with a non-multisampled render target, asking for "sample" interpolation locations doesn't make sense. We demote them to centroid. In a couple of patches, brw_compute_barycentric_modes will begin looking at these intrinsics to determine the barycentric modes. fs_visitor also will use them to code-generate pixel interpolator messages or payload references. Handling the "but what if it's not MSAA?" logic ahead of time in a NIR pass simplifies things and prevents duplicated logic. This patch doesn't actually do anything useful yet as we don't generate these intrinsics. I decided to keep it separate as it's self-contained, in the hopes of shrinking the "convert everything" patch for reviewers. Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs.cpp | 44 1 file changed, 44 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 7316247..ea6616b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -39,6 +39,7 @@ #include "brw_program.h" #include "brw_dead_control_flow.h" #include "compiler/glsl_types.h" +#include "compiler/nir/nir_builder.h" #include "program/prog_parameter.h" using namespace brw; @@ -6442,6 +6443,47 @@ brw_nir_set_default_interpolation(const struct brw_device_info *devinfo, } } +/** + * Demote per-sample barycentric intrinsics to centroid. + * + * Useful when rendering to a non-multisampled buffer. + */ +static void +demote_sample_qualifiers(nir_shader *nir) +{ + nir_foreach_function(f, nir) { + if (!f->impl) + continue; + + nir_builder b; + nir_builder_init(&b, f->impl); + + nir_foreach_block(block, f->impl) { + nir_foreach_instr_safe(instr, block) { +if (instr->type != nir_instr_type_intrinsic) + continue; + +nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); +if (intrin->intrinsic != nir_intrinsic_load_barycentric_sample && +intrin->intrinsic != nir_intrinsic_load_barycentric_at_sample) + continue; + +b.cursor = nir_before_instr(instr); +nir_ssa_def *centroid = + nir_load_barycentric(&b, nir_intrinsic_load_barycentric_centroid, +nir_intrinsic_interp_mode(intrin)); +nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(centroid)); +nir_instr_remove(instr); + } + } + + nir_metadata_preserve(f->impl, (nir_metadata) +((unsigned) nir_metadata_block_index | + (unsigned) nir_metadata_dominance)); + } +} + const unsigned * brw_compile_fs(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, @@ -6462,6 +6504,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, key->flat_shade, key->persample_interp); brw_nir_lower_fs_inputs(shader); brw_nir_lower_fs_outputs(shader); + if (!key->multisample_fbo) + NIR_PASS_V(shader, demote_sample_qualifiers); shader = brw_postprocess_nir(shader, compiler->devinfo, true); /* key->alpha_test_func means simulating alpha testing via discards, -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/7] nir: Add nir_load_interpolated_input lowering code.
Now nir_lower_io can optionally produce load_interpolated_input and load_barycentric_* intrinsics for fragment shader inputs. flat inputs continue using regular load_input. Signed-off-by: Kenneth Graunke --- src/compiler/nir/nir_lower_io.c | 94 ++--- 1 file changed, 89 insertions(+), 5 deletions(-) diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index aa8a517..e480264 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -174,12 +174,30 @@ lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state, { nir_variable *var = intrin->variables[0]->var; nir_variable_mode mode = var->data.mode; + nir_ssa_def *barycentric = NULL; nir_intrinsic_op op; switch (mode) { case nir_var_shader_in: - op = vertex_index ? nir_intrinsic_load_per_vertex_input : - nir_intrinsic_load_input; + if (state->use_interpolated_input && + var->data.interpolation != INTERP_MODE_FLAT) { + assert(vertex_index == NULL); + + nir_intrinsic_op bary_op; + if (var->data.sample) +bary_op = nir_intrinsic_load_barycentric_sample; + else if (var->data.centroid) +bary_op = nir_intrinsic_load_barycentric_centroid; + else +bary_op = nir_intrinsic_load_barycentric_pixel; + + barycentric = nir_load_barycentric(&state->builder, bary_op, +var->data.interpolation); + op = nir_intrinsic_load_interpolated_input; + } else { + op = vertex_index ? nir_intrinsic_load_per_vertex_input : + nir_intrinsic_load_input; + } break; case nir_var_shader_out: op = vertex_index ? nir_intrinsic_load_per_vertex_output : @@ -205,10 +223,15 @@ lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state, if (load->intrinsic == nir_intrinsic_load_uniform) nir_intrinsic_set_range(load, state->type_size(var->type)); - if (vertex_index) + if (vertex_index) { load->src[0] = nir_src_for_ssa(vertex_index); - - load->src[vertex_index ? 1 : 0] = nir_src_for_ssa(offset); + load->src[1] = nir_src_for_ssa(offset); + } else if (barycentric) { + load->src[0] = nir_src_for_ssa(barycentric); + load->src[1] = nir_src_for_ssa(offset); + } else { + load->src[0] = nir_src_for_ssa(offset); + } return load; } @@ -288,6 +311,54 @@ lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state, return atomic; } +static nir_intrinsic_instr * +lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state, + nir_ssa_def *offset) +{ + nir_variable *var = intrin->variables[0]->var; + + assert(var->data.mode == nir_var_shader_in); + + nir_intrinsic_op bary_op; + switch (intrin->intrinsic) { + case nir_intrinsic_interp_var_at_centroid: + bary_op = nir_intrinsic_load_barycentric_centroid; + break; + case nir_intrinsic_interp_var_at_sample: + bary_op = nir_intrinsic_load_barycentric_at_sample; + break; + case nir_intrinsic_interp_var_at_offset: + bary_op = nir_intrinsic_load_barycentric_at_offset; + break; + default: + unreachable("Bogus interpolateAt() intrinsic."); + } + + nir_intrinsic_instr *bary_setup = + nir_intrinsic_instr_create(state->mem_ctx, bary_op); + + nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL); + nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation); + + if (intrin->intrinsic != nir_intrinsic_interp_var_at_centroid) + nir_src_copy(&bary_setup->src[0], &intrin->src[0], bary_setup); + + nir_builder_instr_insert(&state->builder, &bary_setup->instr); + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(state->mem_ctx, + nir_intrinsic_load_interpolated_input); + load->num_components = intrin->num_components; + + nir_intrinsic_set_base(load, var->data.driver_location); + nir_intrinsic_set_component(load, var->data.location_frac); + + load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa); + load->src[1] = nir_src_for_ssa(offset); + + return load; +} + static bool nir_lower_io_block(nir_block *block, struct lower_io_state *state) @@ -315,6 +386,12 @@ nir_lower_io_block(nir_block *block, case nir_intrinsic_var_atomic_comp_swap: /* We can lower the io for this nir instrinsic */ break; + case nir_intrinsic_interp_var_at_centroid: + case nir_intrinsic_interp_var_at_sample: + case nir_intrinsic_interp_var_at_offset: + /* We can optionally lower these to load_interpolated_input */ + if (state->use_interpolated_input) +break; default: /* We can't lower the io for this nir instrinsic, so skip it */ continue; @@ -369,6 +446,13 @@ ni
[Mesa-dev] [PATCH 6/7] i965: Rewrite FS input handling to use the new NIR intrinsics.
This eliminates the need to walk the list of input variables, recurse into their types (via logic largely redundant with nir_lower_io), and interpolate all possible inputs up front. The backend no longer has to care about variables at all, which eliminates complications from trying to pack multiple variables into the same location. Instead, each intrinsic specifies exactly what's needed. This should unblock Timothy's work on GL_ARB_enhanced_layouts. Each load_interpolated_input intrinsic corresponds to PLN instructions, while load_barycentric_at_* intrinsics correspond to pixel interpolator messages. The pixel/centroid/sample barycentric intrinsics simply refer to payload fields (delta_xy[]), and don't actually generate any code. Because we use a single intrinsic for both centroid-qualified variables and interpolateAtCentroid(), they become indistinguishable. We stop sending pixel interpolator messages for those, and instead use the payload provided data, which should be considerably faster. On Broadwell: total instructions in shared programs: 9067751 -> 9067570 (-0.00%) instructions in affected programs: 145902 -> 145721 (-0.12%) helped: 422 HURT: 209 total spills in shared programs: 2849 -> 2899 (1.76%) spills in affected programs: 760 -> 810 (6.58%) helped: 0 HURT: 10 total fills in shared programs: 3910 -> 3950 (1.02%) fills in affected programs: 617 -> 657 (6.48%) helped: 0 HURT: 10 LOST: 3 GAINED: 3 The differences mostly appear to be slight changes in MOVs. Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs.cpp | 175 - src/mesa/drivers/dri/i965/brw_fs.h | 9 +- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 410 --- src/mesa/drivers/dri/i965/brw_nir.c | 16 +- 4 files changed, 269 insertions(+), 341 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 94127bc..06007fe 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1067,21 +1067,27 @@ fs_visitor::emit_fragcoord_interpolation(fs_reg wpos) bld.MOV(wpos, this->wpos_w); } -static enum brw_barycentric_mode -barycentric_mode(enum glsl_interp_mode mode, - bool is_centroid, bool is_sample) +enum brw_barycentric_mode +brw_barycentric_mode(enum glsl_interp_mode mode, nir_intrinsic_op op) { - unsigned bary; - /* Barycentric modes don't make sense for flat inputs. */ assert(mode != INTERP_MODE_FLAT); - if (is_sample) { - bary = BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE; - } else if (is_centroid) { - bary = BRW_BARYCENTRIC_PERSPECTIVE_CENTROID; - } else { + unsigned bary; + switch (op) { + case nir_intrinsic_load_barycentric_pixel: + case nir_intrinsic_load_barycentric_at_offset: bary = BRW_BARYCENTRIC_PERSPECTIVE_PIXEL; + break; + case nir_intrinsic_load_barycentric_centroid: + bary = BRW_BARYCENTRIC_PERSPECTIVE_CENTROID; + break; + case nir_intrinsic_load_barycentric_sample: + case nir_intrinsic_load_barycentric_at_sample: + bary = BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE; + break; + default: + assert(!"invalid intrinsic"); } if (mode == INTERP_MODE_NOPERSPECTIVE) @@ -1101,107 +1107,6 @@ centroid_to_pixel(enum brw_barycentric_mode bary) return (enum brw_barycentric_mode) ((unsigned) bary - 1); } -void -fs_visitor::emit_general_interpolation(fs_reg *attr, const char *name, - const glsl_type *type, - glsl_interp_mode interpolation_mode, - int *location, bool mod_centroid, - bool mod_sample) -{ - assert(stage == MESA_SHADER_FRAGMENT); - brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data; - - if (type->is_array() || type->is_matrix()) { - const glsl_type *elem_type = glsl_get_array_element(type); - const unsigned length = glsl_get_length(type); - - for (unsigned i = 0; i < length; i++) { - emit_general_interpolation(attr, name, elem_type, interpolation_mode, -location, mod_centroid, mod_sample); - } - } else if (type->is_record()) { - for (unsigned i = 0; i < type->length; i++) { - const glsl_type *field_type = type->fields.structure[i].type; - emit_general_interpolation(attr, name, field_type, interpolation_mode, -location, mod_centroid, mod_sample); - } - } else { - assert(type->is_scalar() || type->is_vector()); - - if (prog_data->urb_setup[*location] == -1) { - /* If there's no incoming setup data for this slot, don't - * emit interpolation for it. - */ - *attr = offset(*attr, bld, type->vector_elements); - (*location)++; - return; - } - - attr->type = brw_type_for_base_type(type->get_scalar_typ
[Mesa-dev] [PATCH 2/7] nir: Add a nir_lower_io flag for using load_interpolated_input intrins.
While my intention is that the new intrinsics should be usable by all drivers, we need to make them optional until all drivers switch. This doesn't do anything yet, but I added it as a separate patch to keep the interface churn separate for easier review. Signed-off-by: Kenneth Graunke --- src/compiler/nir/nir.h | 3 ++- src/compiler/nir/nir_lower_io.c | 15 +++ src/gallium/drivers/freedreno/ir3/ir3_cmdline.c | 2 +- src/mesa/drivers/dri/i965/brw_blorp.c | 2 +- src/mesa/drivers/dri/i965/brw_nir.c | 18 +- src/mesa/drivers/dri/i965/brw_program.c | 4 ++-- src/mesa/state_tracker/st_glsl_to_nir.cpp | 2 +- 7 files changed, 27 insertions(+), 19 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index ac11998..e996e0e 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2324,7 +2324,8 @@ void nir_assign_var_locations(struct exec_list *var_list, unsigned *size, void nir_lower_io(nir_shader *shader, nir_variable_mode modes, - int (*type_size)(const struct glsl_type *)); + int (*type_size)(const struct glsl_type *), + bool use_load_interpolated_input_intrinsics); nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr); nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr); diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index b05a73f..aa8a517 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -39,6 +39,7 @@ struct lower_io_state { void *mem_ctx; int (*type_size)(const struct glsl_type *type); nir_variable_mode modes; + bool use_interpolated_input; }; void @@ -394,7 +395,8 @@ nir_lower_io_block(nir_block *block, static void nir_lower_io_impl(nir_function_impl *impl, nir_variable_mode modes, - int (*type_size)(const struct glsl_type *)) + int (*type_size)(const struct glsl_type *), + bool use_interpolated_input) { struct lower_io_state state; @@ -402,6 +404,7 @@ nir_lower_io_impl(nir_function_impl *impl, state.mem_ctx = ralloc_parent(impl); state.modes = modes; state.type_size = type_size; + state.use_interpolated_input = use_interpolated_input; nir_foreach_block(block, impl) { nir_lower_io_block(block, &state); @@ -413,11 +416,15 @@ nir_lower_io_impl(nir_function_impl *impl, void nir_lower_io(nir_shader *shader, nir_variable_mode modes, - int (*type_size)(const struct glsl_type *)) + int (*type_size)(const struct glsl_type *), + bool use_interpolated_input) { nir_foreach_function(function, shader) { - if (function->impl) - nir_lower_io_impl(function->impl, modes, type_size); + if (function->impl) { + nir_lower_io_impl(function->impl, modes, type_size, + use_interpolated_input && + shader->stage == MESA_SHADER_FRAGMENT); + } } } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c index 41532fc..a8a8c1b 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c @@ -93,7 +93,7 @@ load_glsl(unsigned num_files, char* const* files, gl_shader_stage stage) // TODO nir_assign_var_locations?? NIR_PASS_V(nir, nir_lower_system_values); - NIR_PASS_V(nir, nir_lower_io, nir_var_all, st_glsl_type_size); + NIR_PASS_V(nir, nir_lower_io, nir_var_all, st_glsl_type_size, false); NIR_PASS_V(nir, nir_lower_samplers, prog); return nir; diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c index 282a5b2..0473cfe 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.c +++ b/src/mesa/drivers/dri/i965/brw_blorp.c @@ -209,7 +209,7 @@ brw_blorp_compile_nir_shader(struct brw_context *brw, struct nir_shader *nir, unsigned end = var->data.location + nir_uniform_type_size(var->type); nir->num_uniforms = MAX2(nir->num_uniforms, end); } - nir_lower_io(nir, nir_var_uniform, nir_uniform_type_size); + nir_lower_io(nir, nir_var_uniform, nir_uniform_type_size, false); const unsigned *program = brw_compile_fs(compiler, brw, mem_ctx, wm_key, &wm_prog_data, nir, diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 6c3e1d1..caf9fe0 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -204,7 +204,7 @@ brw_nir_lower_vs_inputs(nir_shader *nir, * loaded as one vec4 or dvec4 per element (or matrix column), depending on * whether it is a double-precision type or not. */ - nir_lower_io(nir, nir_var_shader_in, type_size_vs_input); + nir_lower_io(nir, nir_var_shader_in, type_siz
[Mesa-dev] [PATCH 1/7] nir: Add new intrinsics for fragment shader input interpolation.
Backends can normally handle shader inputs solely by looking at load_input intrinsics, and ignore the nir_variables in nir->inputs. One exception is fragment shader inputs. load_input doesn't capture the necessary interpolation information - flat, smooth, noperspective mode, and centroid, sample, or pixel for the location. This means that backends have to interpolate based on the nir_variables, then associate those with the load_input intrinsics (say, by storing a map of which variables are at which locations). With GL_ARB_enhanced_layouts, we're going to have multiple varyings packed into a single vec4 location. The intrinsics make this easy: simply load N components from location . However, working with variables and correlating the two is very awkward; we'd much rather have intrinsics capture all the necessary information. Fragment shader input interpolation typically works by producing a set of barycentric coordinates, then using those to do a linear interpolation between the values at the triangle's corners. We represent this by introducing five new load_barycentric_* intrinsics: - load_barycentric_pixel (ordinary variable) - load_barycentric_centroid (centroid qualified variable) - load_barycentric_sample(sample qualified variable) - load_barycentric_at_sample (ARB_gpu_shader5's interpolateAtSample()) - load_barycentric_at_offset (ARB_gpu_shader5's interpolateAtOffset()) Each of these take the interpolation mode (smooth or noperspective only) as a const_index, and produce a vec2. The last two also take a sample or offset source. We then introduce a new load_interpolated_input intrinsic, which is like a normal load_input intrinsic, but with an additional barycentric coordinate source. The intention is that flat inputs will still use regular load_input intrinsics. This makes them distinguishable from normal inputs that need fancy interpolation, while also providing all the necessary data. This nicely unifies regular inputs and interpolateAt functions. Qualifiers and variables become irrelevant; there are just load_barycentric intrinsics that determine the interpolation. Signed-off-by: Kenneth Graunke --- src/compiler/nir/nir.h| 6 ++ src/compiler/nir/nir_builder.h| 11 +++ src/compiler/nir/nir_intrinsics.h | 24 src/compiler/nir/nir_lower_io.c | 1 + src/compiler/nir/nir_print.c | 1 + 5 files changed, 43 insertions(+) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index c5d3b6b..ac11998 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -992,6 +992,11 @@ typedef enum { */ NIR_INTRINSIC_COMPONENT = 8, + /** +* Interpolation mode (only meaningful for FS inputs). +*/ + NIR_INTRINSIC_INTERP_MODE = 9, + NIR_INTRINSIC_NUM_INDEX_FLAGS, } nir_intrinsic_index_flag; @@ -1059,6 +1064,7 @@ INTRINSIC_IDX_ACCESSORS(range, RANGE, unsigned) INTRINSIC_IDX_ACCESSORS(desc_set, DESC_SET, unsigned) INTRINSIC_IDX_ACCESSORS(binding, BINDING, unsigned) INTRINSIC_IDX_ACCESSORS(component, COMPONENT, unsigned) +INTRINSIC_IDX_ACCESSORS(interp_mode, INTERP_MODE, unsigned) /** * \group texture information diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h index 09cdf72..435582a 100644 --- a/src/compiler/nir/nir_builder.h +++ b/src/compiler/nir/nir_builder.h @@ -458,6 +458,17 @@ nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index) return &load->dest.ssa; } +static inline nir_ssa_def * +nir_load_barycentric(nir_builder *build, nir_intrinsic_op op, + unsigned interp_mode) +{ + nir_intrinsic_instr *bary = nir_intrinsic_instr_create(build->shader, op); + nir_ssa_dest_init(&bary->instr, &bary->dest, 2, 32, NULL); + nir_intrinsic_set_interp_mode(bary, interp_mode); + nir_builder_instr_insert(build, &bary->instr); + return &bary->dest.ssa; +} + static inline void nir_jump(nir_builder *build, nir_jump_type jump_type) { diff --git a/src/compiler/nir/nir_intrinsics.h b/src/compiler/nir/nir_intrinsics.h index 2f74555..29917e3 100644 --- a/src/compiler/nir/nir_intrinsics.h +++ b/src/compiler/nir/nir_intrinsics.h @@ -306,6 +306,27 @@ SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx) SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx) SYSTEM_VALUE(channel_num, 1, 0, xx, xx, xx) +/** + * Barycentric coordinate intrinsics. + * + * These set up the barycentric coordinates for a particular interpolation. + * The first three are for the simple cases: pixel, centroid, or per-sample + * (at gl_SampleID). The next two handle interpolating at a specified + * sample location, or interpolating with a vec2 offset, + * + * The vec2 value produced by these intrinsics is intended for use as the + * barycoord source of a load_interpolated_input intrinsic. + */ +SYSTEM_VALUE(barycentric_pixel, 2, 1, INTERP_MODE, xx, xx) +SYSTEM_VALUE(barycentric_centroid, 2, 1, INTERP_MODE, xx, xx) +SYSTEM_VALUE(barycentric_sample, 2
[Mesa-dev] [RFC] gallium/u_queue: add barrier function
Helper to block until all previous jobs are complete. --- So I think this might end up being useful to me in some cases.. but the implementation only works for a single threaded queue (which is all I need). I could also just put a helper in my driver code. Opinions? src/gallium/auxiliary/util/u_queue.c | 12 src/gallium/auxiliary/util/u_queue.h | 2 ++ 2 files changed, 14 insertions(+) diff --git a/src/gallium/auxiliary/util/u_queue.c b/src/gallium/auxiliary/util/u_queue.c index 838464f..861faca 100644 --- a/src/gallium/auxiliary/util/u_queue.c +++ b/src/gallium/auxiliary/util/u_queue.c @@ -242,3 +242,15 @@ util_queue_add_job(struct util_queue *queue, pipe_condvar_signal(queue->has_queued_cond); pipe_mutex_unlock(queue->lock); } + +static void dummy_execute(void *job, int thread_index) {} + +/* blocks until all previously queued jobs complete: */ +void util_queue_barrier(struct util_queue *queue) +{ + struct util_queue_fence fence; + util_queue_fence_init(&fence); + util_queue_add_job(queue, &fence /*dummy*/, &fence, dummy_execute, NULL); + util_queue_job_wait(&fence); + util_queue_fence_destroy(&fence); +} diff --git a/src/gallium/auxiliary/util/u_queue.h b/src/gallium/auxiliary/util/u_queue.h index 59646cc..8a22ee0 100644 --- a/src/gallium/auxiliary/util/u_queue.h +++ b/src/gallium/auxiliary/util/u_queue.h @@ -85,6 +85,8 @@ void util_queue_add_job(struct util_queue *queue, void util_queue_job_wait(struct util_queue_fence *fence); +void util_queue_barrier(struct util_queue *queue); + /* util_queue needs to be cleared to zeroes for this to work */ static inline bool util_queue_is_initialized(struct util_queue *queue) -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 2/2] st/vdapu: use lanczos filter for scaling v2
HIGH_QUALITY_SCALING_L2 to HIGH_QUALTIY_SCALING_L9 uses lanczos filter with number representing the size of the sinc window. Signed-off-by: Nayan Deshmukh --- src/gallium/state_trackers/vdpau/mixer.c | 116 --- src/gallium/state_trackers/vdpau/query.c | 8 ++ src/gallium/state_trackers/vdpau/vdpau_private.h | 7 ++ 3 files changed, 97 insertions(+), 34 deletions(-) diff --git a/src/gallium/state_trackers/vdpau/mixer.c b/src/gallium/state_trackers/vdpau/mixer.c index cb0ef03..87822c8 100644 --- a/src/gallium/state_trackers/vdpau/mixer.c +++ b/src/gallium/state_trackers/vdpau/mixer.c @@ -82,14 +82,6 @@ vlVdpVideoMixerCreate(VdpDevice device, switch (features[i]) { /* they are valid, but we doesn't support them */ case VDP_VIDEO_MIXER_FEATURE_DEINTERLACE_TEMPORAL_SPATIAL: - case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L2: - case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L3: - case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L4: - case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L5: - case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L6: - case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L7: - case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L8: - case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L9: case VDP_VIDEO_MIXER_FEATURE_INVERSE_TELECINE: break; @@ -112,6 +104,17 @@ vlVdpVideoMixerCreate(VdpDevice device, case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L1: vmixer->bicubic.supported = true; break; + + case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L2: + case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L3: + case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L4: + case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L5: + case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L6: + case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L7: + case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L8: + case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L9: + vmixer->lanczos.supported = true; + break; default: goto no_params; } } @@ -209,6 +212,10 @@ vlVdpVideoMixerDestroy(VdpVideoMixer mixer) vl_bicubic_filter_cleanup(vmixer->bicubic.filter); FREE(vmixer->bicubic.filter); } + if (vmixer->lanczos.filter) { + vl_lanczos_filter_cleanup(vmixer->lanczos.filter); + FREE(vmixer->lanczos.filter); + } pipe_mutex_unlock(vmixer->device->mutex); DeviceReference(&vmixer->device, NULL); @@ -335,7 +342,7 @@ VdpStatus vlVdpVideoMixerRender(VdpVideoMixer mixer, } vl_compositor_set_buffer_layer(&vmixer->cstate, compositor, layer, video_buffer, prect, NULL, deinterlace); - if(vmixer->bicubic.filter) { + if(vmixer->bicubic.filter || vmixer->lanczos.filter) { struct pipe_context *pipe; struct pipe_resource res_tmpl, *res; struct pipe_sampler_view sv_templ; @@ -389,7 +396,7 @@ VdpStatus vlVdpVideoMixerRender(VdpVideoMixer mixer, ++layers; } - if (!vmixer->noise_reduction.filter && !vmixer->sharpness.filter && !vmixer->bicubic.filter) + if (!vmixer->noise_reduction.filter && !vmixer->sharpness.filter && !vmixer->bicubic.filter && !vmixer->lanczos.filter) vlVdpSave4DelayedRendering(vmixer->device, destination_surface, &vmixer->cstate); else { vl_compositor_render(&vmixer->cstate, compositor, surface, &dirty_area, true); @@ -408,6 +415,12 @@ VdpStatus vlVdpVideoMixerRender(VdpVideoMixer mixer, RectToPipe(destination_video_rect, &rect), RectToPipe(destination_rect, &clip)); + if (vmixer->lanczos.filter) + vl_lanczos_filter_render(vmixer->lanczos.filter, + sampler_view, dst->surface, + RectToPipe(destination_video_rect, &rect), + RectToPipe(destination_rect, &clip)); + if(surface != dst->surface) { pipe_sampler_view_reference(&sampler_view, NULL); pipe_surface_reference(&surface, NULL); @@ -536,6 +549,29 @@ vlVdpVideoMixerUpdateBicubicFilter(vlVdpVideoMixer *vmixer) } /** + * Update the lanczos filter + */ +static void +vlVdpVideoMixerUpdateLanczosFilter(vlVdpVideoMixer *vmixer) +{ + assert(vmixer); + + /* if present remove the old filter first */ + if (vmixer->lanczos.filter) { + vl_lanczos_filter_cleanup(vmixer->lanczos.filter); + FREE(vmixer->lanczos.filter); + vmixer->lanczos.filter = NULL; + } + /* and create a new filter as needed */ + if (vmixer->lanczos.enabled) { + vmixer->lanczos.filter = MALLOC(sizeof(struct vl_lanczos_filter)); + vl_lanczos_filter_init(vmixer->lanczos.filter, vmixer->device->context, + vmixer->lanczos.size, vmixer->video_width, + vmixer->video_height); + } +} + +/**