Re: [Mesa-dev] [PATCH 4/6] i965/gen8: Correct HALIGN for AUX surfaces
On Thu, May 28, 2015 at 10:21:32AM -0700, Ben Widawsky wrote: This restriction was attempted in this commit: commit 47053464630888f819ef8cc44278f1a1220159b9 Author: Anuj Phogat anuj.pho...@gmail.com Date: Fri Feb 13 11:21:21 2015 -0800 i965/gen8: Use HALIGN_16 if MCS is enabled for non-MSRT However, the commit itself doesn't achieve the desired goal as determined by the asserts which the next patch adds. mcs_mt is never a value because we're in the process of allocating the mcs_mt miptree when we get to this function. I didn't check, but perhaps this would work with blorp, however, meta clears allocate the miptree structure (which AFAICT needs the alignment also) way before it allocates using meta clears where the renderbuffer is allocated way before the aux buffer. The restriction is referenced in a few places, but the most concise one [IMO] from the spec is for Gen9. Gen8 8 loosens the restriction in that it only requires this for non-msrt surface. When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN 16 must be used. With the code before the miptree layout flag rework (patches preceding this), accomplishing this workaround is very difficult. Cc: Anuj Phogat anuj.pho...@gmail.com Cc: Neil Roberts n...@linux.intel.com Signed-off-by: Ben Widawsky b...@bwidawsk.net --- src/mesa/drivers/dri/i965/brw_tex_layout.c| 16 ++-- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 15 --- src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 4 +++- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 72b02a2..b51c7c7 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -41,8 +41,13 @@ static unsigned int intel_horizontal_texture_alignment_unit(struct brw_context *brw, -struct intel_mipmap_tree *mt) +struct intel_mipmap_tree *mt, +uint32_t layout_flags) { + Extra newline. + if (layout_flags MIPTREE_LAYOUT_FORCE_HALIGN16) + return 16; + /** * From the Alignment Unit Size section of various specs, namely: * - Gen3 Spec: Memory Data Formats Volume, Section 1.20.1.4 @@ -91,9 +96,6 @@ intel_horizontal_texture_alignment_unit(struct brw_context *brw, if (brw-gen = 7 mt-format == MESA_FORMAT_Z_UNORM16) return 8; - if (brw-gen == 8 mt-mcs_mt mt-num_samples = 1) - return 16; - return 4; } @@ -459,7 +461,8 @@ brw_miptree_layout_texture_3d(struct brw_context *brw, } void -brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt) +brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt, + uint32_t layout_flags) { bool multisampled = mt-num_samples 1; bool gen6_hiz_or_stencil = false; @@ -492,8 +495,9 @@ brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt) mt-align_w = 128 / mt-cpp; mt-align_h = 32; } + assert((layout_flags MIPTREE_LAYOUT_FORCE_HALIGN16) == 0); } else { - mt-align_w = intel_horizontal_texture_alignment_unit(brw, mt); + mt-align_w = intel_horizontal_texture_alignment_unit(brw, mt, layout_flags); mt-align_h = intel_vertical_texture_alignment_unit(brw, mt-format, multisampled); } diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 75ee19a..a1ac0cf 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -475,7 +475,10 @@ intel_miptree_create_layout(struct brw_context *brw, if (layout_flags MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD) mt-array_layout = ALL_SLICES_AT_EACH_LOD; - brw_miptree_layout(brw, mt); + if (intel_miptree_is_fast_clear_capable(brw, mt)) + layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16; + + brw_miptree_layout(brw, mt, layout_flags); if (mt-disable_aux_buffers) assert(mt-msaa_layout != INTEL_MSAA_LAYOUT_CMS); @@ -722,6 +725,7 @@ intel_miptree_create(struct brw_context *brw, if (mt-msaa_layout == INTEL_MSAA_LAYOUT_CMS) { + assert(mt-num_samples 1); if (!intel_miptree_alloc_mcs(brw, mt, num_samples)) { intel_miptree_release(mt); return NULL; @@ -734,8 +738,10 @@ intel_miptree_create(struct brw_context *brw, * clear actually occurs. */ if (intel_is_non_msrt_mcs_tile_supported(brw, mt-tiling) - intel_miptree_is_fast_clear_capable(brw, mt)) + intel_miptree_is_fast_clear_capable(brw, mt)) { mt-fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED; + assert(brw-gen 8 || mt-align_w == 16); + }
Re: [Mesa-dev] [PATCH 5/6] i965/gen9: Set HALIGN_16 for all aux buffers
On Thu, May 28, 2015 at 10:21:33AM -0700, Ben Widawsky wrote: Just like the previous patch, but for the GEN9 constraints. Signed-off-by: Ben Widawsky b...@bwidawsk.net --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 5 + 1 file changed, 5 insertions(+) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index a1ac0cf..89030aa 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -655,6 +655,11 @@ intel_miptree_create(struct brw_context *brw, assert((layout_flags MIPTREE_LAYOUT_DISABLE_AUX) == 0); assert((layout_flags MIPTREE_LAYOUT_FOR_BO) == 0); + + if (brw-gen = 9 num_samples 1) + layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16; + + Extra newline. mt = intel_miptree_create_layout(brw, target, format, first_level, last_level, width0, height0, depth0, num_samples, -- 2.4.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/6] i965: Consolidate certain miptree params to flags
On Fri, May 29, 2015 at 09:32:53AM +0300, Pohjolainen, Topi wrote: On Thu, May 28, 2015 at 10:21:29AM -0700, Ben Widawsky wrote: I think pretty much everyone agrees that having more than a single bool as a function argument is bordering on a bad idea. What sucks about the current code is in several instances it's necessary to propagate these boolean selections down to lower layers of the code. This requires plumbing (mechanical, but still churn) pretty much all of the miptree functions each time. By introducing the flags paramater, it is possible to add miptree constraints very easily. I like this a lot. I have a few simple comments below. The use of this, as is already the case, is sometimes we have some information at the time we create the miptree that needs to be known all the way at the lowest levels of the create/allocation, disable_aux_buffers is currently one such example. There will be another example coming up in a few patches. Cc: Chad Versace chad.vers...@linux.intel.com Signed-off-by: Ben Widawsky b...@bwidawsk.net --- src/mesa/drivers/dri/i965/intel_fbo.c | 5 +- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 86 +- src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 9 ++- src/mesa/drivers/dri/i965/intel_pixel_draw.c | 2 +- src/mesa/drivers/dri/i965/intel_tex.c | 8 +-- src/mesa/drivers/dri/i965/intel_tex.h | 2 +- src/mesa/drivers/dri/i965/intel_tex_image.c| 14 ++--- src/mesa/drivers/dri/i965/intel_tex_validate.c | 3 +- 8 files changed, 63 insertions(+), 66 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index aebed72..1b3a72f 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -390,7 +390,7 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx, image-height, 1, image-pitch, - true /*disable_aux_buffers*/); + MIPTREE_LAYOUT_DISABLE_AUX); if (!irb-mt) return; @@ -1027,10 +1027,9 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw, intel_image-base.Base.Level, intel_image-base.Base.Level, width, height, depth, - true, irb-mt-num_samples, INTEL_MIPTREE_TILING_ANY, - false); + MIPTREE_LAYOUT_ACCELERATED_UPLOAD); if (intel_miptree_wants_hiz_buffer(brw, new_mt)) { intel_miptree_alloc_hiz(brw, new_mt); diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 24a5c3d..b243f8a 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -244,10 +244,8 @@ intel_miptree_create_layout(struct brw_context *brw, GLuint width0, GLuint height0, GLuint depth0, -bool for_bo, GLuint num_samples, -bool force_all_slices_at_each_lod, -bool disable_aux_buffers) +uint32_t layout_flags) { struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1); if (!mt) @@ -286,7 +284,7 @@ intel_miptree_create_layout(struct brw_context *brw, mt-logical_height0 = height0; mt-logical_depth0 = depth0; mt-fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS; - mt-disable_aux_buffers = disable_aux_buffers; + mt-disable_aux_buffers = !!(layout_flags MIPTREE_LAYOUT_DISABLE_AUX); You didn't mean to have double negation (!!), did you? I actually meant that isn't layout_flags MIPTREE_LAYOUT_DISABLE_AUX enough on its own? exec_list_make_empty(mt-hiz_map); /* The cpp is bytes per (1, blockheight)-sized block for compressed @@ -422,12 +420,14 @@ intel_miptree_create_layout(struct brw_context *brw, mt-physical_height0 = height0; mt-physical_depth0 = depth0; - if (!for_bo + if (!(layout_flags MIPTREE_LAYOUT_FOR_BO) _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL (brw-must_use_separate_stencil || (brw-has_separate_stencil intel_miptree_wants_hiz_buffer(brw, mt { - const bool force_all_slices_at_each_lod = brw-gen == 6; + uint32_t stencil_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD; + if (brw-gen == 6) + stencil_flags
Re: [Mesa-dev] [PATCH 1/6] i965: Consolidate certain miptree params to flags
On Thu, May 28, 2015 at 10:21:29AM -0700, Ben Widawsky wrote: I think pretty much everyone agrees that having more than a single bool as a function argument is bordering on a bad idea. What sucks about the current code is in several instances it's necessary to propagate these boolean selections down to lower layers of the code. This requires plumbing (mechanical, but still churn) pretty much all of the miptree functions each time. By introducing the flags paramater, it is possible to add miptree constraints very easily. I like this a lot. I have a few simple comments below. The use of this, as is already the case, is sometimes we have some information at the time we create the miptree that needs to be known all the way at the lowest levels of the create/allocation, disable_aux_buffers is currently one such example. There will be another example coming up in a few patches. Cc: Chad Versace chad.vers...@linux.intel.com Signed-off-by: Ben Widawsky b...@bwidawsk.net --- src/mesa/drivers/dri/i965/intel_fbo.c | 5 +- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 86 +- src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 9 ++- src/mesa/drivers/dri/i965/intel_pixel_draw.c | 2 +- src/mesa/drivers/dri/i965/intel_tex.c | 8 +-- src/mesa/drivers/dri/i965/intel_tex.h | 2 +- src/mesa/drivers/dri/i965/intel_tex_image.c| 14 ++--- src/mesa/drivers/dri/i965/intel_tex_validate.c | 3 +- 8 files changed, 63 insertions(+), 66 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index aebed72..1b3a72f 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -390,7 +390,7 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx, image-height, 1, image-pitch, - true /*disable_aux_buffers*/); + MIPTREE_LAYOUT_DISABLE_AUX); if (!irb-mt) return; @@ -1027,10 +1027,9 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw, intel_image-base.Base.Level, intel_image-base.Base.Level, width, height, depth, - true, irb-mt-num_samples, INTEL_MIPTREE_TILING_ANY, - false); + MIPTREE_LAYOUT_ACCELERATED_UPLOAD); if (intel_miptree_wants_hiz_buffer(brw, new_mt)) { intel_miptree_alloc_hiz(brw, new_mt); diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 24a5c3d..b243f8a 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -244,10 +244,8 @@ intel_miptree_create_layout(struct brw_context *brw, GLuint width0, GLuint height0, GLuint depth0, -bool for_bo, GLuint num_samples, -bool force_all_slices_at_each_lod, -bool disable_aux_buffers) +uint32_t layout_flags) { struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1); if (!mt) @@ -286,7 +284,7 @@ intel_miptree_create_layout(struct brw_context *brw, mt-logical_height0 = height0; mt-logical_depth0 = depth0; mt-fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS; - mt-disable_aux_buffers = disable_aux_buffers; + mt-disable_aux_buffers = !!(layout_flags MIPTREE_LAYOUT_DISABLE_AUX); You didn't mean to have double negation (!!), did you? exec_list_make_empty(mt-hiz_map); /* The cpp is bytes per (1, blockheight)-sized block for compressed @@ -422,12 +420,14 @@ intel_miptree_create_layout(struct brw_context *brw, mt-physical_height0 = height0; mt-physical_depth0 = depth0; - if (!for_bo + if (!(layout_flags MIPTREE_LAYOUT_FOR_BO) _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL (brw-must_use_separate_stencil || (brw-has_separate_stencil intel_miptree_wants_hiz_buffer(brw, mt { - const bool force_all_slices_at_each_lod = brw-gen == 6; + uint32_t stencil_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD; + if (brw-gen == 6) + stencil_flags |= MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD; Perhaps a separating line here, your choice. mt-stencil_mt = intel_miptree_create(brw, mt-target,
[Mesa-dev] Re: Mesa (master): mesa: Allow overriding the version of ES2+ contexts
Hi Ian, On 29.05.2015 09:00, i...@kemper.freedesktop.org (Ian Romanick) wrote: Module: Mesa Branch: master Commit: 9b5e92f4ccc6ee1cb9caea947f6efaad2b391cf1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9b5e92f4ccc6ee1cb9caea947f6efaad2b391cf1 Author: Ian Romanick ian.d.roman...@intel.com Date: Wed Apr 29 16:12:40 2015 -0700 mesa: Allow overriding the version of ES2+ contexts Signed-off-by: Ian Romanick ian.d.roman...@intel.com Reviewed-by: Tapani Pälli tapani.pa...@intel.com This commit broke a bunch of GLES related piglit tests for me with radeonsi, e.g. egl-create-context-default-major-version-gles: Unexpected GLES version: 93847553.6 Mesa 10.7.0-devel (git-9b5e92f) Expected GLES 1.0 or 1.1. PIGLIT: {result: fail } The numbers before Mesa in the version string seem random, and sometimes the test happens to pass. While bisecting this, I noticed that there are some possibly related compiler warnings in src/mesa/main/version.c and src/mesa/drivers/dri/common/dri_util.c. -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v5] i915/aa: fixing anti-aliasing bug for thinnest width lines
On PNV platform, for 1 pixel line thickness or less, the general anti-aliasing algorithm gives up, and a garbage line is generated. Setting a Line Width of 0.0 specifies the rasterization of the thinnest (one-pixel-wide), non-antialiased lines. Lines rendered with zero Line Width are rasterized using Grid Intersection Quantization rules as specified by bspec G45: Volume 2: 3D/Media, 7.3.13.1 Zero-Width (Cosmetic) Line Rasterization section. This patch follow the same rules as patches fixing the https://bugs.freedesktop.org/show_bug.cgi?id=28832 bug. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90367 Signed-off-by: Marius Predut marius.pre...@intel.com --- src/mesa/drivers/dri/i915/i915_state.c | 15 +++ 1 file changed, 15 insertions(+) diff --git a/src/mesa/drivers/dri/i915/i915_state.c b/src/mesa/drivers/dri/i915/i915_state.c index 5f10b84..6cd342c 100644 --- a/src/mesa/drivers/dri/i915/i915_state.c +++ b/src/mesa/drivers/dri/i915/i915_state.c @@ -599,6 +599,21 @@ i915LineWidth(struct gl_context * ctx, GLfloat widthf) width = (int) (widthf * 2); width = CLAMP(width, 1, 0xf); + + if (ctx-Line.Width 1.5 || widthf 1.5) { +/* For 1 pixel line thickness or less, the general + * anti-aliasing algorithm gives up, and a garbage line is + * generated. Setting a Line Width of 0.0 specifies the + * rasterization of the thinnest (one-pixel-wide), + * non-antialiased lines. + * + * Lines rendered with zero Line Width are rasterized using + * Grid Intersection Quantization rules as specified by + * bspec G45: Volume 2: 3D/Media, + * 7.3.13.1 Zero-Width (Cosmetic) Line Rasterization section + */ +width = 0; + } lis4 |= width S4_LINE_WIDTH_SHIFT; if (lis4 != i915-state.Ctx[I915_CTXREG_LIS4]) { -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallivm: Use the LLVM's C disassembly interface.
On 28/05/15 19:26, Roland Scheidegger wrote: Looks ok, it's massively simpler and shouldn't break as often. Reviewed-by: Roland Scheidegger srol...@vmware.com Am 28.05.2015 um 17:57 schrieb Jose Fonseca: It doesn't do everything we want. In particular it doesn't allow to detect jumps or return opcodes. Currently we detect the x86's RET opcode. Even though it's worse for LLVM 3.3, it's an improvement for LLVM 3.7, which was totally busted. --- scons/llvm.py | 4 +- src/gallium/auxiliary/gallivm/lp_bld_debug.cpp | 260 - 2 files changed, 40 insertions(+), 224 deletions(-) diff --git a/scons/llvm.py b/scons/llvm.py index 17278df..c59b8cb 100644 --- a/scons/llvm.py +++ b/scons/llvm.py @@ -120,6 +120,7 @@ def generate(env): ]) elif llvm_version = distutils.version.LooseVersion('3.5'): env.Prepend(LIBS = [ +'LLVMMCDisassembler', 'LLVMBitWriter', 'LLVMMCJIT', 'LLVMRuntimeDyld', 'LLVMX86Disassembler', 'LLVMX86AsmParser', 'LLVMX86CodeGen', 'LLVMSelectionDAG', 'LLVMAsmPrinter', 'LLVMX86Desc', @@ -132,6 +133,7 @@ def generate(env): ]) else: env.Prepend(LIBS = [ +'LLVMMCDisassembler', 'LLVMBitWriter', 'LLVMX86Disassembler', 'LLVMX86AsmParser', 'LLVMX86CodeGen', 'LLVMX86Desc', 'LLVMSelectionDAG', 'LLVMAsmPrinter', 'LLVMMCParser', 'LLVMX86AsmPrinter', @@ -189,7 +191,7 @@ def generate(env): if '-fno-rtti' in cxxflags: env.Append(CXXFLAGS = ['-fno-rtti']) -components = ['engine', 'mcjit', 'bitwriter', 'x86asmprinter'] +components = ['engine', 'mcjit', 'bitwriter', 'x86asmprinter', 'mcdisassembler'] env.ParseConfig('llvm-config --libs ' + ' '.join(components)) env.ParseConfig('llvm-config --ldflags') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp index 76c302f..64fb044 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp @@ -28,40 +28,12 @@ #include stddef.h #include llvm-c/Core.h -#include llvm/Target/TargetMachine.h -#include llvm/Target/TargetInstrInfo.h +#include llvm-c/Disassembler.h #include llvm/Support/raw_ostream.h #include llvm/Support/Format.h - -#if HAVE_LLVM = 0x0306 -#include llvm/Target/TargetSubtargetInfo.h -#else -#include llvm/Support/MemoryObject.h -#endif - -#include llvm/Support/TargetRegistry.h -#include llvm/MC/MCSubtargetInfo.h - #include llvm/Support/Host.h - #include llvm/IR/Module.h -#include llvm/MC/MCDisassembler.h -#include llvm/MC/MCAsmInfo.h -#include llvm/MC/MCInst.h -#include llvm/MC/MCInstPrinter.h -#include llvm/MC/MCRegisterInfo.h - -#if HAVE_LLVM = 0x0305 -#define OwningPtr std::unique_ptr -#else -#include llvm/ADT/OwningPtr.h -#endif - -#if HAVE_LLVM = 0x0305 -#include llvm/MC/MCContext.h -#endif - #include util/u_math.h #include util/u_debug.h @@ -133,7 +105,7 @@ lp_get_module_id(LLVMModuleRef module) extern C void lp_debug_dump_value(LLVMValueRef value) { -#if (defined(PIPE_OS_WINDOWS) !defined(PIPE_CC_MSVC)) || defined(PIPE_OS_EMBDDED) +#if (defined(PIPE_OS_WINDOWS) !defined(PIPE_CC_MSVC)) || defined(PIPE_OS_EMBEDDED) raw_debug_ostream os; llvm::unwrap(value)-print(os); os.flush(); @@ -143,44 +115,16 @@ lp_debug_dump_value(LLVMValueRef value) } -#if HAVE_LLVM 0x0306 - -/* - * MemoryObject wrapper around a buffer of memory, to be used by MC - * disassembler. - */ -class BufferMemoryObject: - public llvm::MemoryObject +static const char * +disassemblerSymbolLookupCB(void *DisInfo, + uint64_t ReferenceValue, + uint64_t *ReferenceType, + uint64_t ReferencePC, + const char **ReferenceName) { -private: - const uint8_t *Bytes; - uint64_t Length; -public: - BufferMemoryObject(const uint8_t *bytes, uint64_t length) : - Bytes(bytes), Length(length) - { - } - - uint64_t getBase() const - { - return 0; - } - - uint64_t getExtent() const - { - return Length; - } - - int readByte(uint64_t addr, uint8_t *byte) const - { - if (addr getExtent()) - return -1; - *byte = Bytes[addr]; - return 0; - } -}; - -#endif /* HAVE_LLVM 0x0306 */ + // TODO: Maybe this can be used to guess jumps + return NULL; +} /* @@ -193,8 +137,6 @@ public: static size_t disassemble(const void* func, llvm::raw_ostream Out) { - using namespace llvm; - const uint8_t *bytes = (const uint8_t *)func; /* @@ -202,101 +144,23 @@ disassemble(const void* func, llvm::raw_ostream Out) */ const uint64_t extent = 96 * 1024; - uint64_t max_pc = 0; - /* * Initialize all used
Re: [Mesa-dev] Ideas on loop unrolling, loop examples, and my GSoC-blog
Hi, On 05/28/2015 10:19 PM, Thomas Helland wrote: One more thing; Is there a limit where the loop body gets so large that we want to decide that gah, this sucks, no point in unrolling this? I imagine as the loops get large there will be a case of diminishing returns from the unrolling? I think only backend can say something to that. You e.g. give backend unrolled and non-unrolled versions and backend decides which one is better (after applying additional optimizations)... - Eero ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] xdemos/corender: Remove.
Rendering from multiple processes into the same X window is not something that works in practice anymore. Nowadays interprocess rendering is better done with GLX/EGL extensions for that effect. https://bugs.freedesktop.org/show_bug.cgi?id=30279 --- src/xdemos/.gitignore | 1 - src/xdemos/CMakeLists.txt | 3 +- src/xdemos/Makefile.am| 6 - src/xdemos/corender.c | 400 -- src/xdemos/ipc.c | 264 -- src/xdemos/ipc.h | 16 -- 6 files changed, 1 insertion(+), 689 deletions(-) delete mode 100644 src/xdemos/corender.c delete mode 100644 src/xdemos/ipc.c delete mode 100644 src/xdemos/ipc.h diff --git a/src/xdemos/.gitignore b/src/xdemos/.gitignore index 41f2519..7ad9449 100644 --- a/src/xdemos/.gitignore +++ b/src/xdemos/.gitignore @@ -1,4 +1,3 @@ -corender glsync glthreads glxcontexts diff --git a/src/xdemos/CMakeLists.txt b/src/xdemos/CMakeLists.txt index 97329fe..d49a6e6 100644 --- a/src/xdemos/CMakeLists.txt +++ b/src/xdemos/CMakeLists.txt @@ -70,8 +70,7 @@ target_link_libraries (${subdir}_pbdemo pbutil) target_link_libraries (${subdir}_pbinfo pbutil) target_link_libraries (${subdir}_sharedtex_mt pthread) -add_executable (corender corender.c ipc.c) add_executable (xrotfontdemo xrotfontdemo.c xuserotfont.c) add_executable (glxinfo glxinfo.c glinfo_common.c) -install (TARGETS glxinfo corender xrotfontdemo DESTINATION demos) +install (TARGETS glxinfo xrotfontdemo DESTINATION demos) diff --git a/src/xdemos/Makefile.am b/src/xdemos/Makefile.am index cfd23b1..625b278 100644 --- a/src/xdemos/Makefile.am +++ b/src/xdemos/Makefile.am @@ -34,7 +34,6 @@ if HAVE_X11 noinst_LTLIBRARIES = libpbutil.la bin_PROGRAMS = \ - corender \ glsync \ glthreads \ glxdemo \ @@ -67,11 +66,6 @@ libpbutil_la_SOURCES = \ pbutil.c \ pbutil.h -corender_SOURCES = \ - corender.c \ - ipc.c \ - ipc.h - xrotfontdemo_SOURCES = \ xrotfontdemo.c \ xuserotfont.c \ diff --git a/src/xdemos/corender.c b/src/xdemos/corender.c deleted file mode 100644 index e706f4b..000 --- a/src/xdemos/corender.c +++ /dev/null @@ -1,400 +0,0 @@ -/** - * Example of cooperative rendering into one window by two processes. - * The first instance of the program creates the GLX window. - * The second instance of the program gets the window ID from the first - * and draws into it. - * Socket IPC is used for synchronization. - * - * Usage: - * 1. run 'corender ' - * 2. run 'corender 2' (any arg will do) - * - * Brian Paul - * 11 Oct 2007 - */ - - -#include GL/gl.h -#include GL/glx.h -#include assert.h -#include math.h -#include stdio.h -#include stdlib.h -#include X11/keysym.h -#include unistd.h -#include ipc.h - - -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif - -static int MyID = 0; /* 0 or 1 */ -static int WindowID = 0; -static GLXContext Context = 0; -static int Width = 700, Height = 350; -static int Rot = 0; -static int Sock = 0; - -static GLfloat Red[4] = {1.0, 0.2, 0.2, 1.0}; -static GLfloat Blue[4] = {0.2, 0.2, 1.0, 1.0}; - -static int Sync = 1; /** synchronized rendering? */ - - -static void -setup_ipc(void) -{ - int k, port = 10001; - - if (MyID == 0) { - /* I'm the first one, wait for connection from second */ - k = CreatePort(port); - assert(k != -1); - - printf(Waiting for connection from another 'corender'\n); - Sock = AcceptConnection(k); - assert(Sock != -1); - - printf(Got connection, sending windowID\n); - - /* send windowID */ - SendData(Sock, WindowID, sizeof(WindowID)); - } - else { - /* I'm the second one, connect to first */ - char hostname[1000]; - - MyHostName(hostname, 1000); - Sock = Connect(hostname, port); - assert(Sock != -1); - - /* get windowID */ - ReceiveData(Sock, WindowID, sizeof(WindowID)); - printf(Contacted first 'corender', getting WindowID\n); - } -} - - - -/** from GLUT */ -static void -doughnut(GLfloat r, GLfloat R, GLint nsides, GLint rings) -{ - int i, j; - GLfloat theta, phi, theta1; - GLfloat cosTheta, sinTheta; - GLfloat cosTheta1, sinTheta1; - GLfloat ringDelta, sideDelta; - - ringDelta = 2.0 * M_PI / rings; - sideDelta = 2.0 * M_PI / nsides; - - theta = 0.0; - cosTheta = 1.0; - sinTheta = 0.0; - for (i = rings - 1; i = 0; i--) { -theta1 = theta + ringDelta; -cosTheta1 = cos(theta1); -sinTheta1 = sin(theta1); -glBegin(GL_QUAD_STRIP); -phi = 0.0; -for (j = nsides; j = 0; j--) { - GLfloat cosPhi, sinPhi, dist; - - phi += sideDelta; - cosPhi = cos(phi); - sinPhi = sin(phi); - dist = R + r * cosPhi; - - glNormal3f(cosTheta1 * cosPhi, -sinTheta1 * cosPhi, sinPhi); - glVertex3f(cosTheta1 * dist, -sinTheta1 * dist, r * sinPhi); - glNormal3f(cosTheta * cosPhi, -sinTheta * cosPhi, sinPhi); - glVertex3f(cosTheta * dist, -sinTheta * dist,
Re: [Mesa-dev] [PATCH v5] i915/aa: fixing anti-aliasing bug for thinnest width lines
Sorry , it is v1 not v5 ( for Pineview platform : http://ark.intel.com/products/codename/32201/Pineview) -Original Message- From: Predut, Marius Sent: Friday, May 29, 2015 2:51 PM To: mesa-dev@lists.freedesktop.org Cc: Predut, Marius Subject: [Mesa-dev][PATCH v5] i915/aa: fixing anti-aliasing bug for thinnest width lines On PNV platform, for 1 pixel line thickness or less, the general anti-aliasing algorithm gives up, and a garbage line is generated. Setting a Line Width of 0.0 specifies the rasterization of the thinnest (one-pixel-wide), non-antialiased lines. Lines rendered with zero Line Width are rasterized using Grid Intersection Quantization rules as specified by bspec G45: Volume 2: 3D/Media, 7.3.13.1 Zero-Width (Cosmetic) Line Rasterization section. This patch follow the same rules as patches fixing the https://bugs.freedesktop.org/show_bug.cgi?id=28832 bug. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90367 Signed-off-by: Marius Predut marius.pre...@intel.com --- src/mesa/drivers/dri/i915/i915_state.c | 15 +++ 1 file changed, 15 insertions(+) diff --git a/src/mesa/drivers/dri/i915/i915_state.c b/src/mesa/drivers/dri/i915/i915_state.c index 5f10b84..6cd342c 100644 --- a/src/mesa/drivers/dri/i915/i915_state.c +++ b/src/mesa/drivers/dri/i915/i915_state.c @@ -599,6 +599,21 @@ i915LineWidth(struct gl_context * ctx, GLfloat widthf) width = (int) (widthf * 2); width = CLAMP(width, 1, 0xf); + + if (ctx-Line.Width 1.5 || widthf 1.5) { +/* For 1 pixel line thickness or less, the general + * anti-aliasing algorithm gives up, and a garbage line is + * generated. Setting a Line Width of 0.0 specifies the + * rasterization of the thinnest (one-pixel-wide), + * non-antialiased lines. + * + * Lines rendered with zero Line Width are rasterized using + * Grid Intersection Quantization rules as specified by + * bspec G45: Volume 2: 3D/Media, + * 7.3.13.1 Zero-Width (Cosmetic) Line Rasterization section + */ +width = 0; + } lis4 |= width S4_LINE_WIDTH_SHIFT; if (lis4 != i915-state.Ctx[I915_CTXREG_LIS4]) { -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 30279] corender broken on llvmpipe and swrast
https://bugs.freedesktop.org/show_bug.cgi?id=30279 José Fonseca jfons...@vmware.com changed: What|Removed |Added Status|NEW |RESOLVED CC||bri...@vmware.com, ||jfons...@vmware.com, ||srol...@vmware.com Resolution|--- |WONTFIX --- Comment #4 from José Fonseca jfons...@vmware.com --- This is just a too weird use case to be worth our while. Particularly for SW renderers, which end up doing some sort of XPutImage for presents, hence can't actually share the buffers between processes. Even on native NVIDIA corender doesn't work properly -- the blue donut flickers. Probably the only setup corender might still work is with indirect GLX, which few people if any care anymore. I think we should actually drop the corender demo from Mesa demos -- it's not soemthing we want people to replicate. -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/3] mesa: remove obsolete and broken sampler code
The warning is now handled earlier in the ast to hir code, and the name was only generated for arrays of arrays in which case this just breaks the Uniform hash lookup. --- src/mesa/program/sampler.cpp | 38 +- 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/src/mesa/program/sampler.cpp b/src/mesa/program/sampler.cpp index ea3024d..34567d2 100644 --- a/src/mesa/program/sampler.cpp +++ b/src/mesa/program/sampler.cpp @@ -38,14 +38,12 @@ class get_sampler_name : public ir_hierarchical_visitor { public: - get_sampler_name(ir_dereference *last, - struct gl_shader_program *shader_program) + get_sampler_name(struct gl_shader_program *shader_program) { this-mem_ctx = ralloc_context(NULL); this-shader_program = shader_program; this-name = NULL; this-offset = 0; - this-last = last; } ~get_sampler_name() @@ -68,29 +66,20 @@ public: virtual ir_visitor_status visit_leave(ir_dereference_array *ir) { ir_constant *index = ir-array_index-as_constant(); - int i; + + /* GLSL 1.10 and 1.20 allowed variable sampler array indices, + * while GLSL 1.30 requires that the array indices be + * constant integer expressions. We don't expect any driver + * to actually work with a really variable array index, so + * all that would work would be an unrolled loop counter so + * the index must be constant at this point. + */ + assert(index != NULL); if (index) { -i = index-value.i[0]; - } else { -/* GLSL 1.10 and 1.20 allowed variable sampler array indices, - * while GLSL 1.30 requires that the array indices be - * constant integer expressions. We don't expect any driver - * to actually work with a really variable array index, so - * all that would work would be an unrolled loop counter that ends - * up being constant above. - */ -ralloc_strcat(shader_program-InfoLog, - warning: Variable sampler array index unsupported.\n - This feature of the language was removed in GLSL 1.20 - and is unlikely to be supported for 1.10 in Mesa.\n); -i = 0; - } - if (ir != last) { -this-name = ralloc_asprintf(mem_ctx, %s[%d], name, i); - } else { -offset = i; +offset = index-value.i[0]; } + return visit_continue; } @@ -98,7 +87,6 @@ public: const char *name; void *mem_ctx; int offset; - ir_dereference *last; }; @@ -107,7 +95,7 @@ _mesa_get_sampler_uniform_value(class ir_dereference *sampler, struct gl_shader_program *shader_program, const struct gl_program *prog) { - get_sampler_name getname(sampler, shader_program); + get_sampler_name getname(shader_program); GLuint shader = _mesa_program_enum_to_shader_stage(prog-Target); -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] gallivm: Remove stub disassemblerSymbolLookupCB.
It's incompletete -- it wasn't filling ReferenceType so it was causing garbagge on the disassembly. Furthermore it seems impossible to get the jump information through this interface. The solution for function size problem is to effectively book-keep the machine code start and end address while JIT'ing. --- src/gallium/auxiliary/gallivm/lp_bld_debug.cpp | 14 +- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp index 64fb044..9a85248 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp @@ -115,18 +115,6 @@ lp_debug_dump_value(LLVMValueRef value) } -static const char * -disassemblerSymbolLookupCB(void *DisInfo, - uint64_t ReferenceValue, - uint64_t *ReferenceType, - uint64_t ReferencePC, - const char **ReferenceName) -{ - // TODO: Maybe this can be used to guess jumps - return NULL; -} - - /* * Disassemble a function, using the LLVM MC disassembler. * @@ -149,7 +137,7 @@ disassemble(const void* func, llvm::raw_ostream Out) */ std::string Triple = llvm::sys::getProcessTriple(); - LLVMDisasmContextRef D = LLVMCreateDisasm(Triple.c_str(), NULL, 0, NULL, disassemblerSymbolLookupCB); + LLVMDisasmContextRef D = LLVMCreateDisasm(Triple.c_str(), NULL, 0, NULL, NULL); char outline[1024]; if (!D) { -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] xdemos/corender: Remove.
Reviewed-by: Brian Paul bri...@vmware.com On 05/29/2015 05:10 AM, Jose Fonseca wrote: Rendering from multiple processes into the same X window is not something that works in practice anymore. Nowadays interprocess rendering is better done with GLX/EGL extensions for that effect. https://bugs.freedesktop.org/show_bug.cgi?id=30279 --- src/xdemos/.gitignore | 1 - src/xdemos/CMakeLists.txt | 3 +- src/xdemos/Makefile.am| 6 - src/xdemos/corender.c | 400 -- src/xdemos/ipc.c | 264 -- src/xdemos/ipc.h | 16 -- 6 files changed, 1 insertion(+), 689 deletions(-) delete mode 100644 src/xdemos/corender.c delete mode 100644 src/xdemos/ipc.c delete mode 100644 src/xdemos/ipc.h diff --git a/src/xdemos/.gitignore b/src/xdemos/.gitignore index 41f2519..7ad9449 100644 --- a/src/xdemos/.gitignore +++ b/src/xdemos/.gitignore @@ -1,4 +1,3 @@ -corender glsync glthreads glxcontexts diff --git a/src/xdemos/CMakeLists.txt b/src/xdemos/CMakeLists.txt index 97329fe..d49a6e6 100644 --- a/src/xdemos/CMakeLists.txt +++ b/src/xdemos/CMakeLists.txt @@ -70,8 +70,7 @@ target_link_libraries (${subdir}_pbdemo pbutil) target_link_libraries (${subdir}_pbinfo pbutil) target_link_libraries (${subdir}_sharedtex_mt pthread) -add_executable (corender corender.c ipc.c) add_executable (xrotfontdemo xrotfontdemo.c xuserotfont.c) add_executable (glxinfo glxinfo.c glinfo_common.c) -install (TARGETS glxinfo corender xrotfontdemo DESTINATION demos) +install (TARGETS glxinfo xrotfontdemo DESTINATION demos) diff --git a/src/xdemos/Makefile.am b/src/xdemos/Makefile.am index cfd23b1..625b278 100644 --- a/src/xdemos/Makefile.am +++ b/src/xdemos/Makefile.am @@ -34,7 +34,6 @@ if HAVE_X11 noinst_LTLIBRARIES = libpbutil.la bin_PROGRAMS = \ - corender \ glsync \ glthreads \ glxdemo \ @@ -67,11 +66,6 @@ libpbutil_la_SOURCES = \ pbutil.c \ pbutil.h -corender_SOURCES = \ - corender.c \ - ipc.c \ - ipc.h - xrotfontdemo_SOURCES = \ xrotfontdemo.c \ xuserotfont.c \ diff --git a/src/xdemos/corender.c b/src/xdemos/corender.c deleted file mode 100644 index e706f4b..000 --- a/src/xdemos/corender.c +++ /dev/null @@ -1,400 +0,0 @@ -/** - * Example of cooperative rendering into one window by two processes. - * The first instance of the program creates the GLX window. - * The second instance of the program gets the window ID from the first - * and draws into it. - * Socket IPC is used for synchronization. - * - * Usage: - * 1. run 'corender ' - * 2. run 'corender 2' (any arg will do) - * - * Brian Paul - * 11 Oct 2007 - */ - - -#include GL/gl.h -#include GL/glx.h -#include assert.h -#include math.h -#include stdio.h -#include stdlib.h -#include X11/keysym.h -#include unistd.h -#include ipc.h - - -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif - -static int MyID = 0; /* 0 or 1 */ -static int WindowID = 0; -static GLXContext Context = 0; -static int Width = 700, Height = 350; -static int Rot = 0; -static int Sock = 0; - -static GLfloat Red[4] = {1.0, 0.2, 0.2, 1.0}; -static GLfloat Blue[4] = {0.2, 0.2, 1.0, 1.0}; - -static int Sync = 1; /** synchronized rendering? */ - - -static void -setup_ipc(void) -{ - int k, port = 10001; - - if (MyID == 0) { - /* I'm the first one, wait for connection from second */ - k = CreatePort(port); - assert(k != -1); - - printf(Waiting for connection from another 'corender'\n); - Sock = AcceptConnection(k); - assert(Sock != -1); - - printf(Got connection, sending windowID\n); - - /* send windowID */ - SendData(Sock, WindowID, sizeof(WindowID)); - } - else { - /* I'm the second one, connect to first */ - char hostname[1000]; - - MyHostName(hostname, 1000); - Sock = Connect(hostname, port); - assert(Sock != -1); - - /* get windowID */ - ReceiveData(Sock, WindowID, sizeof(WindowID)); - printf(Contacted first 'corender', getting WindowID\n); - } -} - - - -/** from GLUT */ -static void -doughnut(GLfloat r, GLfloat R, GLint nsides, GLint rings) -{ - int i, j; - GLfloat theta, phi, theta1; - GLfloat cosTheta, sinTheta; - GLfloat cosTheta1, sinTheta1; - GLfloat ringDelta, sideDelta; - - ringDelta = 2.0 * M_PI / rings; - sideDelta = 2.0 * M_PI / nsides; - - theta = 0.0; - cosTheta = 1.0; - sinTheta = 0.0; - for (i = rings - 1; i = 0; i--) { -theta1 = theta + ringDelta; -cosTheta1 = cos(theta1); -sinTheta1 = sin(theta1); -glBegin(GL_QUAD_STRIP); -phi = 0.0; -for (j = nsides; j = 0; j--) { - GLfloat cosPhi, sinPhi, dist; - - phi += sideDelta; - cosPhi = cos(phi); - sinPhi = sin(phi); - dist = R + r * cosPhi; - - glNormal3f(cosTheta1 * cosPhi, -sinTheta1 * cosPhi, sinPhi); - glVertex3f(cosTheta1 * dist, -sinTheta1 * dist, r * sinPhi); -
[Mesa-dev] [PATCH 3/3] mesa: remove unused function declaration
--- src/mesa/main/uniforms.h | 4 1 file changed, 4 deletions(-) diff --git a/src/mesa/main/uniforms.h b/src/mesa/main/uniforms.h index 55fa235..bd7b05e 100644 --- a/src/mesa/main/uniforms.h +++ b/src/mesa/main/uniforms.h @@ -343,10 +343,6 @@ void GLAPIENTRY _mesa_ProgramUniformMatrix4x3dv(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); -long -_mesa_parse_program_resource_name(const GLchar *name, - const GLchar **out_base_name_end); - unsigned _mesa_get_uniform_location(struct gl_shader_program *shProg, const GLchar *name, unsigned *offset); -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] nir: remove recreated broken sampler logic
This logic was recreated based on the old glsl ir code. The name was only generated for arrays of arrays in which case this just breaks the Uniform hash lookup. --- src/glsl/nir/nir_lower_samplers.cpp | 11 ++- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/src/glsl/nir/nir_lower_samplers.cpp b/src/glsl/nir/nir_lower_samplers.cpp index 7a0b0a0..b054afa 100644 --- a/src/glsl/nir/nir_lower_samplers.cpp +++ b/src/glsl/nir/nir_lower_samplers.cpp @@ -71,15 +71,8 @@ lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_progr nir_deref_array *deref_array = nir_deref_as_array(deref-child); assert(deref_array-deref_array_type != nir_deref_array_type_wildcard); - - if (deref_array-deref.child) { -ralloc_asprintf_append(name, [%u], - deref_array-deref_array_type == nir_deref_array_type_direct ? - deref_array-base_offset : 0); - } else { -assert(deref-child-type-base_type == GLSL_TYPE_SAMPLER); -instr-sampler_index = deref_array-base_offset; - } + assert(deref-child-type-base_type == GLSL_TYPE_SAMPLER); + instr-sampler_index = deref_array-base_offset; /* XXX: We're assuming here that the indirect is the last array * thing we have. This should be ok for now as we don't support -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/5] mesa: remove unused geometry shader variables
For the series, Reviewed-by: Brian Paul bri...@vmware.com On 05/28/2015 05:57 PM, Marek Olšák wrote: From: Marek Olšák marek.ol...@amd.com These states are for GS assembly shaders only. We don't support those. --- src/mesa/main/context.c| 1 - src/mesa/main/mtypes.h | 7 --- src/mesa/main/shared.c | 1 - src/mesa/program/program.c | 9 - 4 files changed, 18 deletions(-) diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index e4faf3d..483e9b1 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -1333,7 +1333,6 @@ _mesa_free_context_data( struct gl_context *ctx ) _mesa_reference_vertprog(ctx, ctx-VertexProgram._Current, NULL); _mesa_reference_vertprog(ctx, ctx-VertexProgram._TnlProgram, NULL); - _mesa_reference_geomprog(ctx, ctx-GeometryProgram.Current, NULL); _mesa_reference_geomprog(ctx, ctx-GeometryProgram._Current, NULL); _mesa_reference_fragprog(ctx, ctx-FragmentProgram.Current, NULL); diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 8342517..96ef060 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2275,16 +2275,10 @@ struct gl_vertex_program_state */ struct gl_geometry_program_state { - GLboolean Enabled; /** GL_ARB_GEOMETRY_SHADER4 */ - GLboolean _Enabled; /** Enabled and valid program? */ - struct gl_geometry_program *Current; /** user-bound geometry program */ - /** Currently enabled and valid program (including internal programs * and compiled shader programs). */ struct gl_geometry_program *_Current; - - GLfloat Parameters[MAX_PROGRAM_ENV_PARAMS][4]; /** Env params */ }; /** @@ -3004,7 +2998,6 @@ struct gl_shared_state struct _mesa_HashTable *Programs; /** All vertex/fragment programs */ struct gl_vertex_program *DefaultVertexProgram; struct gl_fragment_program *DefaultFragmentProgram; - struct gl_geometry_program *DefaultGeometryProgram; /*@}*/ /* GL_ATI_fragment_shader */ diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c index 0b76cc0..d5ac9f1 100644 --- a/src/mesa/main/shared.c +++ b/src/mesa/main/shared.c @@ -313,7 +313,6 @@ free_shared_state(struct gl_context *ctx, struct gl_shared_state *shared) _mesa_DeleteHashTable(shared-Programs); _mesa_reference_vertprog(ctx, shared-DefaultVertexProgram, NULL); - _mesa_reference_geomprog(ctx, shared-DefaultGeometryProgram, NULL); _mesa_reference_fragprog(ctx, shared-DefaultFragmentProgram, NULL); _mesa_HashDeleteAll(shared-ATIShaders, delete_fragshader_cb, ctx); diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c index fb61f4d..f0a47ac 100644 --- a/src/mesa/program/program.c +++ b/src/mesa/program/program.c @@ -97,11 +97,6 @@ _mesa_init_program(struct gl_context *ctx) assert(ctx-FragmentProgram.Current); ctx-FragmentProgram.Cache = _mesa_new_program_cache(); - ctx-GeometryProgram.Enabled = GL_FALSE; - /* right now by default we don't have a geometry program */ - _mesa_reference_geomprog(ctx, ctx-GeometryProgram.Current, -NULL); - _mesa_reference_compprog(ctx, ctx-ComputeProgram.Current, NULL); /* XXX probably move this stuff */ @@ -122,7 +117,6 @@ _mesa_free_program_data(struct gl_context *ctx) _mesa_delete_program_cache(ctx, ctx-VertexProgram.Cache); _mesa_reference_fragprog(ctx, ctx-FragmentProgram.Current, NULL); _mesa_delete_shader_cache(ctx, ctx-FragmentProgram.Cache); - _mesa_reference_geomprog(ctx, ctx-GeometryProgram.Current, NULL); _mesa_reference_compprog(ctx, ctx-ComputeProgram.Current, NULL); /* XXX probably move this stuff */ @@ -153,9 +147,6 @@ _mesa_update_default_objects_program(struct gl_context *ctx) ctx-Shared-DefaultFragmentProgram); assert(ctx-FragmentProgram.Current); - _mesa_reference_geomprog(ctx, ctx-GeometryProgram.Current, - ctx-Shared-DefaultGeometryProgram); - /* XXX probably move this stuff */ if (ctx-ATIFragmentShader.Current) { ctx-ATIFragmentShader.Current-RefCount--; ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] i965: Don't add base_binding_table_index if it's zero
When calculating the binding table index for non-constant sampler array indexing it needs to add the base binding table index which is a constant within the generated code. Often this base is zero so we can avoid a redundant instruction in that case. It looks like nothing in shader-db is doing non-constant sampler array indexing so this patch doesn't make any difference but it might be worth having anyway. --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 3 ++- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index ea46b1a..40a3db3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -789,7 +789,8 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src /* addr = ((sampler * 0x101) + base_binding_table_index) 0xfff */ brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101)); - brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); + if (base_binding_table_index) + brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); brw_AND(p, addr, addr, brw_imm_ud(0xfff)); brw_pop_insn_state(p); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 1d3f5ed..cf1aa83 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -409,7 +409,8 @@ vec4_generator::generate_tex(vec4_instruction *inst, /* addr = ((sampler * 0x101) + base_binding_table_index) 0xfff */ brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101)); - brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); + if (base_binding_table_index) + brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); brw_AND(p, addr, addr, brw_imm_ud(0xfff)); brw_pop_insn_state(p); -- 1.9.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] PIPE_SHADER_* vs TGSI_PROCESSOR_*
On 05/28/2015 05:36 PM, Marek Olšák wrote: Hi, Would people be okay with removing the TGSI_PROCESSOR_ enums and replacing all usages with PIPE_SHADER_*? It's kind of nice that the TGSI shader stuff is self-contained with no real dependency on gallium. This would change that in a tiny way. Is this causing pain? Would adjusting the TSGI/PIPE enum values so they match (VS/FS are mixed up) be of any help? -Brian ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] i965: Don't use a temporary when generating an indirect sample
Previously when generating the send instruction for a sample instruction with an indirect sampler it would use the destination register as a temporary store. This breaks when used in combination with the opt_sampler_eot optimisation because that forces the destination to be null. This patch fixes that by avoiding the temp register altogether. The reason the temporary register was needed was because it was trying to ensure the binding table index doesn't overflow a byte by and'ing it with 0xff. The result is then or'd with samper_index8. This patch instead just and's the whole thing by 0xfff. This will ensure that a bogus sampler index won't overflow into the rest of the message descriptor but unlike the previous code it won't ensure that the binding table index doesn't overflow into the sampler index. It doesn't seem like that should matter very much though because if the shader is generating a bogus sampler index then it's going to just get garbage out either way. Instead of doing sampler_index8|(sampler_index+base_table_index) the new code avoids one operation by doing sampler_index*0x101+base_table_index which should be equivalent. However if we wanted to avoid the multiply for some reason we could do this by adding an extra or instruction still without needing the temporary register. This fixes a number of Piglit tests on Skylake that were using indirect samplers such as: spec@arb_gpu_shader5@execution@sampler_array_indexing@fs-simple --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 17 - src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 16 2 files changed, 8 insertions(+), 25 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 0be0f86..ea46b1a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -779,27 +779,18 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src brw_mark_surface_used(prog_data, sampler + base_binding_table_index); } else { /* Non-const sampler index */ - /* Note: this clobbers `dst` as a temporary before emitting the send */ struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); - struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD)); - struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD)); brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); - /* Some care required: `sampler` and `temp` may alias: - *addr = sampler 0xff - *temp = (sampler 8) 0xf00 - *addr = addr | temp - */ - brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index)); - brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u)); - brw_AND(p, temp, temp, brw_imm_ud(0x0f00)); - brw_AND(p, addr, addr, brw_imm_ud(0x0ff)); - brw_OR(p, addr, addr, temp); + /* addr = ((sampler * 0x101) + base_binding_table_index) 0xfff */ + brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101)); + brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); + brw_AND(p, addr, addr, brw_imm_ud(0xfff)); brw_pop_insn_state(p); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 20d096c..1d3f5ed 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -398,10 +398,8 @@ vec4_generator::generate_tex(vec4_instruction *inst, brw_mark_surface_used(prog_data-base, sampler + base_binding_table_index); } else { /* Non-constant sampler index. */ - /* Note: this clobbers `dst` as a temporary before emitting the send */ struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); - struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD)); struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD)); @@ -409,16 +407,10 @@ vec4_generator::generate_tex(vec4_instruction *inst, brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); - /* Some care required: `sampler` and `temp` may alias: - *addr = sampler 0xff - *temp = (sampler 8) 0xf00 - *addr = addr | temp - */ - brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index)); - brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u)); - brw_AND(p, temp, temp, brw_imm_ud(0x0f00)); - brw_AND(p, addr, addr, brw_imm_ud(0x0ff)); - brw_OR(p, addr, addr, temp); + /* addr = ((sampler * 0x101) + base_binding_table_index) 0xfff */ + brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101)); + brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); + brw_AND(p, addr,
Re: [Mesa-dev] [PATCH 1/3] mesa: remove obsolete and broken sampler code
Please ignore the first two patches turns out the name generation is used for structs (although there seems to be no piglit tests for this) like: struct S { sampler2D tex[2]; }; uniform S i[3]; Where name would become: i[2].tex On Fri, 2015-05-29 at 23:16 +1000, Timothy Arceri wrote: The warning is now handled earlier in the ast to hir code, and the name was only generated for arrays of arrays in which case this just breaks the Uniform hash lookup. --- src/mesa/program/sampler.cpp | 38 +- 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/src/mesa/program/sampler.cpp b/src/mesa/program/sampler.cpp index ea3024d..34567d2 100644 --- a/src/mesa/program/sampler.cpp +++ b/src/mesa/program/sampler.cpp @@ -38,14 +38,12 @@ class get_sampler_name : public ir_hierarchical_visitor { public: - get_sampler_name(ir_dereference *last, - struct gl_shader_program *shader_program) + get_sampler_name(struct gl_shader_program *shader_program) { this-mem_ctx = ralloc_context(NULL); this-shader_program = shader_program; this-name = NULL; this-offset = 0; - this-last = last; } ~get_sampler_name() @@ -68,29 +66,20 @@ public: virtual ir_visitor_status visit_leave(ir_dereference_array *ir) { ir_constant *index = ir-array_index-as_constant(); - int i; + + /* GLSL 1.10 and 1.20 allowed variable sampler array indices, + * while GLSL 1.30 requires that the array indices be + * constant integer expressions. We don't expect any driver + * to actually work with a really variable array index, so + * all that would work would be an unrolled loop counter so + * the index must be constant at this point. + */ + assert(index != NULL); if (index) { - i = index-value.i[0]; - } else { - /* GLSL 1.10 and 1.20 allowed variable sampler array indices, - * while GLSL 1.30 requires that the array indices be - * constant integer expressions. We don't expect any driver - * to actually work with a really variable array index, so - * all that would work would be an unrolled loop counter that ends - * up being constant above. - */ - ralloc_strcat(shader_program-InfoLog, -warning: Variable sampler array index unsupported.\n -This feature of the language was removed in GLSL 1.20 -and is unlikely to be supported for 1.10 in Mesa.\n); - i = 0; - } - if (ir != last) { - this-name = ralloc_asprintf(mem_ctx, %s[%d], name, i); - } else { - offset = i; + offset = index-value.i[0]; } + return visit_continue; } @@ -98,7 +87,6 @@ public: const char *name; void *mem_ctx; int offset; - ir_dereference *last; }; @@ -107,7 +95,7 @@ _mesa_get_sampler_uniform_value(class ir_dereference *sampler, struct gl_shader_program *shader_program, const struct gl_program *prog) { - get_sampler_name getname(sampler, shader_program); + get_sampler_name getname(shader_program); GLuint shader = _mesa_program_enum_to_shader_stage(prog-Target); ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Ideas on loop unrolling, loop examples, and my GSoC-blog
On Fri, May 29, 2015 at 6:23 AM, Eero Tamminen eero.t.tammi...@intel.com wrote: Hi, On 05/28/2015 10:19 PM, Thomas Helland wrote: One more thing; Is there a limit where the loop body gets so large that we want to decide that gah, this sucks, no point in unrolling this? I imagine as the loops get large there will be a case of diminishing returns from the unrolling? I think only backend can say something to that. You e.g. give backend unrolled and non-unrolled versions and backend decides which one is better (after applying additional optimizations)... I don't really think it's going to be too good of an idea to do that, mainly because it means you'd be duplicating a lot of work for the normal vs. unrolled versions, and there might be some really large loops where generating the unrolled version is going to kill your CPU -- doing any amount of work that's proportional to the number of times the loop runs, without any limit, seems like a recipe for disaster. In GLSL IR, we've been fairly lax about figuring out when unrolling is helpful and unhelpful -- we just have a simple node count plus a threshold (as well as a few other heuristics). In NIR, we could similarly have an instruction count plus a threshold and port over the heuristics to whatever extent possible. We do have some logic for figuring out if an array access is constant after unrolling, and it seems like we'd want to keep that around. The next level of sophistication, I guess, is to give the backend a callback to give an estimation of the execution cost of certain operations. For example, unless a negate/absolute value instruction is used by something that can't handle the modifier, then on i965 the cost of those instructions would be 0. I think that would get us most of the way there to something accurate, without needing to do an undue amount of work (in terms of CPU time and man-effort). Connor - Eero ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 11/15] egl: add eglGetSyncAttrib
On Thu 28 May 2015, Marek Olšák wrote: Hi Chad, A new patch is attached. Thanks for the changes. This patch is Reviewed-by: Chad Versace chad.vers...@intel.com ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] dri_util: make version var unsigned to silence warnings
_mesa_override_gl_version_contextless() takes an unsigned version parameter. --- src/mesa/drivers/dri/common/dri_util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index 2d847ef..66f9eaa 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -164,7 +164,7 @@ driCreateNewScreen2(int scrn, int fd, struct gl_constants consts = { 0 }; gl_api api; -int version; +unsigned int version; api = API_OPENGLES2; if (_mesa_override_gl_version_contextless(consts, api, version)) -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] i965: Don't add base_binding_table_index if it's zero
Reviewed-by: Matt Turner matts...@gmail.com ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 12/15] egl: add eglCreateImage
On Thu 28 May 2015, Eirik Byrkjeflot Anonsen wrote: Marek Olšák mar...@gmail.com writes: I don't understand. Using size_t should prevent the integer overflow. Is there anything else wrong other than no fail path for malloc? I also don't understand how calloc can help here. Marek size * sizeof(int_attribs[0]) may overflow and thus wrap to a small number. Using calloc, you'd have calloc(size, sizeof(int_attribs[0])), moving the overflow inside calloc(). So if calloc() does its job properly, it will protect against it. Right. It's very unlikely that an attacker could coerce the size calculation to overflow, but better safe than sorry. calloc() [and ralloc() too] will refuse to allocate memory if the size calculation overflows. ralloc() checks for overflow with some simple arithmetic. I expect that calloc() checks for overflow using a faster method: multiply first, then inspect the overflow flag in a status register. Recent GCC provides builtin functions for that [1]. [1] https://gcc.gnu.org/onlinedocs/gcc/Integer-Overflow-Builtins.html#Integer-Overflow-Builtins ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/3] mesa: remove unused function declaration
On Fri, May 29, 2015 at 6:16 AM, Timothy Arceri t_arc...@yahoo.com.au wrote: --- src/mesa/main/uniforms.h | 4 1 file changed, 4 deletions(-) diff --git a/src/mesa/main/uniforms.h b/src/mesa/main/uniforms.h index 55fa235..bd7b05e 100644 --- a/src/mesa/main/uniforms.h +++ b/src/mesa/main/uniforms.h @@ -343,10 +343,6 @@ void GLAPIENTRY _mesa_ProgramUniformMatrix4x3dv(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); -long -_mesa_parse_program_resource_name(const GLchar *name, - const GLchar **out_base_name_end); - Whoops, looks like it was mistakenly added in commit b92900d2 (which actually did add parse_program_resource_name). Reviewed-by: Matt Turner matts...@gmail.com ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] dri_util: make version var unsigned to silence warnings
On Fri, May 29, 2015 at 10:38 AM, Brian Paul bri...@vmware.com wrote: _mesa_override_gl_version_contextless() takes an unsigned version parameter. --- src/mesa/drivers/dri/common/dri_util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index 2d847ef..66f9eaa 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -164,7 +164,7 @@ driCreateNewScreen2(int scrn, int fd, struct gl_constants consts = { 0 }; gl_api api; -int version; +unsigned int version; Maybe drop the 'int' from the declaration. Reviewed-by: Matt Turner matts...@gmail.com ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] i965: Don't use a temporary when generating an indirect sample
On Fri, May 29, 2015 at 6:53 AM, Neil Roberts n...@linux.intel.com wrote: Previously when generating the send instruction for a sample instruction with an indirect sampler it would use the destination register as a temporary store. This breaks when used in combination with the opt_sampler_eot optimisation because that forces the destination to be null. This patch fixes that by avoiding the temp register altogether. The reason the temporary register was needed was because it was trying to ensure the binding table index doesn't overflow a byte by and'ing it with 0xff. The result is then or'd with samper_index8. This patch instead just and's the whole thing by 0xfff. This will ensure that a bogus sampler index won't overflow into the rest of the message descriptor but unlike the previous code it won't ensure that the binding table index doesn't overflow into the sampler index. It doesn't seem like that should matter very much though because if the shader is generating a bogus sampler index then it's going to just get garbage out either way. Instead of doing sampler_index8|(sampler_index+base_table_index) the new code avoids one operation by doing sampler_index*0x101+base_table_index which should be equivalent. However if we wanted to avoid the multiply for some reason we could do this by adding an extra or instruction still without needing the temporary register. This fixes a number of Piglit tests on Skylake that were using indirect samplers such as: spec@arb_gpu_shader5@execution@sampler_array_indexing@fs-simple --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 17 - src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 16 2 files changed, 8 insertions(+), 25 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 0be0f86..ea46b1a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -779,27 +779,18 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src brw_mark_surface_used(prog_data, sampler + base_binding_table_index); } else { /* Non-const sampler index */ - /* Note: this clobbers `dst` as a temporary before emitting the send */ struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); - struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD)); - struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD)); brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); - /* Some care required: `sampler` and `temp` may alias: - *addr = sampler 0xff - *temp = (sampler 8) 0xf00 - *addr = addr | temp - */ - brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index)); - brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u)); - brw_AND(p, temp, temp, brw_imm_ud(0x0f00)); - brw_AND(p, addr, addr, brw_imm_ud(0x0ff)); - brw_OR(p, addr, addr, temp); + /* addr = ((sampler * 0x101) + base_binding_table_index) 0xfff */ + brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101)); + brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); + brw_AND(p, addr, addr, brw_imm_ud(0xfff)); brw_pop_insn_state(p); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 20d096c..1d3f5ed 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -398,10 +398,8 @@ vec4_generator::generate_tex(vec4_instruction *inst, brw_mark_surface_used(prog_data-base, sampler + base_binding_table_index); } else { /* Non-constant sampler index. */ - /* Note: this clobbers `dst` as a temporary before emitting the send */ struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); - struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD)); I'd delete the blank line here as well to match the fs code. Really nice solution. I'd been trying to figure out the best way to get an additional temporary in here, but this is clearly better. Reviewed-by: Matt Turner matts...@gmail.com ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 13/15] egl: add new platform functions
On Thu 28 May 2015, Marek Olšák wrote: A new patch is attached. Please review. Looks good to me. Reviewed-by: Chad Versace chad.vers...@intel.com Later, if and when some platform extension defines a pointer-sized attribute, then we will need to invert the function order. That is, eglGetPlatformDisplayEXT will then need to be a wrapper around eglGetPlatformDisplay. But this patch's approach is fine for now. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 15/15] egl: expose EGL 1.5 if all requirements are met
The last two patches, 14-15, are Reviewed-by: Chad Versace chad.vers...@intel.com I think I've reviewed all the patches in this series. Please let me know if I missed any. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/3] i965: Create a shader_dispatch_mode enum to replace VS/GS fields.
On Fri, May 29, 2015 at 12:26:39PM -0700, Kenneth Graunke wrote: We used to store the GS dispatch mode in brw_gs_prog_data while separately storing the VS dispatch mode in brw_vue_prog_data::simd8. This patch introduces an enum to represent all possible dispatch modes, and stores it in brw_vue_prog_data::dispatch_mode, unifying the two. Based on a suggestion by Matt Turner. Signed-off-by: Kenneth Graunke kenn...@whitecape.org --- src/mesa/drivers/dri/i965/brw_context.h | 16 +++- src/mesa/drivers/dri/i965/brw_defines.h | 5 ++--- src/mesa/drivers/dri/i965/brw_vec4.cpp| 5 - src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 8 src/mesa/drivers/dri/i965/brw_vs_surface_state.c | 4 ++-- src/mesa/drivers/dri/i965/gen7_gs_state.c | 2 +- src/mesa/drivers/dri/i965/gen8_gs_state.c | 3 ++- src/mesa/drivers/dri/i965/gen8_vs_state.c | 3 ++- 8 files changed, 24 insertions(+), 22 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index abc11f6..01c4283 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -605,6 +605,12 @@ struct brw_ff_gs_prog_data { unsigned svbi_postincrement_value; }; +enum shader_dispatch_mode { + DISPATCH_MODE_4X1_SINGLE = 0, + DISPATCH_MODE_4X2_DUAL_INSTANCE = 1, + DISPATCH_MODE_4X2_DUAL_OBJECT = 2, + DISPATCH_MODE_SIMD8 = 3, +}; /* Note: brw_vue_prog_data_compare() must be updated when adding fields to * this struct! @@ -622,7 +628,7 @@ struct brw_vue_prog_data { */ GLuint urb_entry_size; - bool simd8; + enum shader_dispatch_mode dispatch_mode; }; @@ -720,14 +726,6 @@ struct brw_gs_prog_data int invocations; /** -* Dispatch mode, can be any of: -* GEN7_GS_DISPATCH_MODE_DUAL_OBJECT -* GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE -* GEN7_GS_DISPATCH_MODE_SINGLE -*/ - int dispatch_mode; - - /** * Gen6 transform feedback enabled flag. */ bool gen6_xfb_enabled; diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index dedc381..f6da305 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1773,9 +1773,8 @@ enum brw_message_target { # define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID 1 # define GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT 20 # define GEN7_GS_INSTANCE_CONTROL_SHIFT 15 -# define GEN7_GS_DISPATCH_MODE_SINGLE(0 11) -# define GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE (1 11) -# define GEN7_GS_DISPATCH_MODE_DUAL_OBJECT (2 11) +# define GEN7_GS_DISPATCH_MODE_SHIFT11 +# define GEN7_GS_DISPATCH_MODE_MASK INTEL_MASK(12, 11) # define GEN6_GS_STATISTICS_ENABLE (1 10) # define GEN6_GS_SO_STATISTICS_ENABLE(1 9) # define GEN6_GS_RENDERING_ENABLE(1 8) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index a324798..5a9c3f5 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1894,6 +1894,8 @@ brw_vs_emit(struct brw_context *brw, brw_create_nir(brw, NULL, c-vp-program.Base, MESA_SHADER_VERTEX); } + prog_data-base.dispatch_mode = DISPATCH_MODE_SIMD8; + fs_visitor v(brw, mem_ctx, MESA_SHADER_VERTEX, c-key, prog_data-base.base, prog, c-vp-program.Base, 8); if (!v.run_vs()) { @@ -1926,11 +1928,12 @@ brw_vs_emit(struct brw_context *brw, g.generate_code(v.cfg, 8); assembly = g.get_assembly(final_assembly_size); - prog_data-base.simd8 = true; c-base.last_scratch = v.last_scratch; for whatever it's worth, it would have made more sense to put the dispatch_mode change here. } if (!assembly) { + prog_data-base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; + vec4_vs_visitor v(brw, c, prog_data, prog, mem_ctx); if (!v.run()) { if (prog) { diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 363e30e..eacb2f5 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -106,7 +106,7 @@ vec4_gs_visitor::setup_payload() * to be interleaved, so one register contains two attribute slots. */ int attributes_per_reg = - c-prog_data.dispatch_mode == GEN7_GS_DISPATCH_MODE_DUAL_OBJECT ? 1 : 2; + c-prog_data.base.dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2; /* If a geometry shader tries to read from an input that wasn't written by * the vertex shader, that
Re: [Mesa-dev] [PATCH 6/6] i965/gen8+: Add aux buffer alignment assertions
On Thu 28 May 2015, Ben Widawsky wrote: This helped find the incorrect HALIGN values from the previous patches. Signed-off-by: Ben Widawsky b...@bwidawsk.net --- src/mesa/drivers/dri/i965/gen8_surface_state.c | 4 1 file changed, 4 insertions(+) diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index 672fc70..c8965db 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -178,6 +178,8 @@ gen8_emit_texture_surface_state(struct brw_context *brw, if (mt-mcs_mt) { aux_mt = mt-mcs_mt; aux_mode = GEN8_SURFACE_AUX_MODE_MCS; + assert(brw-gen 9 || mt-align_w == 16); + assert(brw-gen 8 || mt-num_samples 0 || mt-align_w == 16); When I saw the gen8 assertion, I was unsure if you were asserting that the miptree satisfied a purely software expectation or that the miptree also satifisfied a hardware requirement. I'd like to see a PRM quote here to make it clear: The Broadwell PRM, RENDER_SURFACE_STATE.SurfaceHorizontalAlignment, says When MCS is enabled for non-MSRT, HALIGN_16 must be used. uint32_t *surf = allocate_surface_state(brw, surf_offset, surf_index); @@ -391,6 +393,8 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, if (mt-mcs_mt) { aux_mt = mt-mcs_mt; aux_mode = GEN8_SURFACE_AUX_MODE_MCS; + assert(brw-gen 9 || mt-align_w == 16); + assert(brw-gen 8 || mt-num_samples 0 || mt-align_w == 16); } Same here. With some PRM quotes sprinkled in, this patch is Reviewed-by: Chad Versace chad.vers...@intel.com ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/6] i965/gen8: Correct HALIGN for AUX surfaces
On Thu 28 May 2015, Ben Widawsky wrote: This restriction was attempted in this commit: commit 47053464630888f819ef8cc44278f1a1220159b9 Author: Anuj Phogat anuj.pho...@gmail.com Date: Fri Feb 13 11:21:21 2015 -0800 i965/gen8: Use HALIGN_16 if MCS is enabled for non-MSRT However, the commit itself doesn't achieve the desired goal as determined by the asserts which the next patch adds. mcs_mt is never a value because we're in the process of allocating the mcs_mt miptree when we get to this function. I didn't check, but perhaps this would work with blorp, however, meta clears allocate the miptree structure (which AFAICT needs the alignment also) way before it allocates using meta clears where the renderbuffer is allocated way before the aux buffer. The restriction is referenced in a few places, but the most concise one [IMO] from the spec is for Gen9. Gen8 8 loosens the restriction in that it only requires this for non-msrt surface. When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN 16 must be used. With the code before the miptree layout flag rework (patches preceding this), accomplishing this workaround is very difficult. Cc: Anuj Phogat anuj.pho...@gmail.com Cc: Neil Roberts n...@linux.intel.com Signed-off-by: Ben Widawsky b...@bwidawsk.net --- src/mesa/drivers/dri/i965/brw_tex_layout.c| 16 ++-- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 15 --- src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 4 +++- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 72b02a2..b51c7c7 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -41,8 +41,13 @@ static unsigned int intel_horizontal_texture_alignment_unit(struct brw_context *brw, -struct intel_mipmap_tree *mt) +struct intel_mipmap_tree *mt, +uint32_t layout_flags) { + + if (layout_flags MIPTREE_LAYOUT_FORCE_HALIGN16) + return 16; + This snippet makes sense. If the force flag is set, then don't bother inspecting any other state. /** * From the Alignment Unit Size section of various specs, namely: * - Gen3 Spec: Memory Data Formats Volume, Section 1.20.1.4 @@ -91,9 +96,6 @@ intel_horizontal_texture_alignment_unit(struct brw_context *brw, if (brw-gen = 7 mt-format == MESA_FORMAT_Z_UNORM16) return 8; - if (brw-gen == 8 mt-mcs_mt mt-num_samples = 1) - return 16; - return 4; } @@ -459,7 +461,8 @@ brw_miptree_layout_texture_3d(struct brw_context *brw, } void -brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt) +brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt, + uint32_t layout_flags) { bool multisampled = mt-num_samples 1; bool gen6_hiz_or_stencil = false; @@ -492,8 +495,9 @@ brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt) mt-align_w = 128 / mt-cpp; mt-align_h = 32; } + assert((layout_flags MIPTREE_LAYOUT_FORCE_HALIGN16) == 0); This assertion makes sense. } else { - mt-align_w = intel_horizontal_texture_alignment_unit(brw, mt); + mt-align_w = intel_horizontal_texture_alignment_unit(brw, mt, layout_flags); mt-align_h = intel_vertical_texture_alignment_unit(brw, mt-format, multisampled); } diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 75ee19a..a1ac0cf 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -475,7 +475,10 @@ intel_miptree_create_layout(struct brw_context *brw, if (layout_flags MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD) mt-array_layout = ALL_SLICES_AT_EACH_LOD; - brw_miptree_layout(brw, mt); + if (intel_miptree_is_fast_clear_capable(brw, mt)) + layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16; + + brw_miptree_layout(brw, mt, layout_flags); This does not make sense to me. I though HALIGN16 didn't exist before Skylake, but here you're setting it on Ivybridge through Broadwell too. Am I misunderstanding something? ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/3] i965: Drop Vector Mask Enable bit from 3DSTATE_GS on Gen8+.
On Fri, May 29, 2015 at 12:26:38PM -0700, Kenneth Graunke wrote: The documentation makes it pretty clear that we shouldn't use this: Under normal conditions SW shall specify DMask, as the GS stage will provide a Dispatch Mask appropriate to SIMD4x2 or SIMD8 thread execution (as a function of dispatch mode). E.g., for SIMD4x2 execution, the GS stage will generate a Dispatch Mask that is equal to what the EU would use as the Vector Mask. For SIMD8 execution there is no known usage model for use of Vector Mask (as there is for PS shaders). I also managed to find descriptions of DMask and VMask, in the State Register (sr0.2/3) field descriptions: Dispatch Mask (DMask). This 32-bit field specifies which channels are active at Dispatch time. Vector Mask (VMask). This 32-bit field contains, for each 4-bit group, the OR of the corresponding 4-bit group in the dispatch mask. SIMD4x2 shaders process one or two vec4 values, with each 4-bit group corresponding to xyzw channel enables (either all on, or all off). Thus, DMask = VMask in SIMD4x2 mode. But in SIMD8 mode, 4-bit groups are meaningless, so it just messes up your values. Just making sure you're reading this the same way I am, DMask == VMask for SIMD4x2. VMask shouldn't be used for SIMD8. While here, you've probably seen this before, but I just noticed for SIMD8 GS on BDW: Not valid for objects with more than 6 vertices per object. Probably we don't care, but SPF might be effected by this patch, broken, or fixed by this - I can't quite tell which. This seems fine to me though. Reviewed-by: Ben Widawsky b...@bwidawsk.net Signed-off-by: Kenneth Graunke kenn...@whitecape.org --- src/mesa/drivers/dri/i965/gen8_gs_state.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) I'm working on SIMD8 geometry shader support. These are a few simple patches that are pretty straightforward and could be landed on their own. diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c index 6a0e215..0763e91 100644 --- a/src/mesa/drivers/dri/i965/gen8_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c @@ -48,8 +48,7 @@ gen8_upload_gs_state(struct brw_context *brw) OUT_BATCH(_3DSTATE_GS 16 | (10 - 2)); OUT_BATCH(stage_state-prog_offset); OUT_BATCH(0); - OUT_BATCH(GEN6_GS_VECTOR_MASK_ENABLE | -brw-geometry_program-VerticesIn | + OUT_BATCH(brw-geometry_program-VerticesIn | ((ALIGN(stage_state-sampler_count, 4)/4) GEN6_GS_SAMPLER_COUNT_SHIFT) | ((prog_data-base.binding_table.size_bytes / 4) -- Ben Widawsky, Intel Open Source Technology Center ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] enable pbuffer for drm platform
When we tried to enable EGL on our project, we found that there was no pbuffer support for drm platform. I got some help from Alan and Kristian. Thank you so much! I studied platform_drm.c and compared it to other platforms' code. Finally I was able to create this patch to enable pbuffer for drm platform. Our QA team has run full cycle testing and didn't find any regression. I wonder if anybody could review this patch to see if we could submit it to mesa. Thanks Ying --- mesa-10.5.5.orig/src/egl/drivers/dri2/egl_dri2.c 2015-05-11 12:10:59.0 -0700 +++ mesa-10.5.5/src/egl/drivers/dri2/egl_dri2.c 2015-05-26 04:06:16.353346946 -0700 @@ -869,6 +869,10 @@ dri2_create_context(_EGLDriver *drv, _EG */ if (conf-SurfaceType EGL_WINDOW_BIT) dri2_ctx-base.WindowRenderBuffer = EGL_BACK_BUFFER; + + if (conf-SurfaceType EGL_PBUFFER_BIT) + dri2_ctx-base.WindowRenderBuffer = EGL_BACK_BUFFER; + } else dri_config = NULL; diff -rupN mesa-10.5.5.orig/src/egl/drivers/dri2/platform_drm.c mesa-10.5.5/src/egl/drivers/dri2/platform_drm.c --- mesa-10.5.5.orig/src/egl/drivers/dri2/platform_drm.c2015-05-11 12:10:59.0 -0700 +++ mesa-10.5.5/src/egl/drivers/dri2/platform_drm.c 2015-05-26 04:04:25.293346505 -0700 @@ -122,6 +122,8 @@ dri2_drm_create_surface(_EGLDriver *drv, dri2_surf-base.Height = surf-base.height; surf-dri_private = dri2_surf; break; + case EGL_PBUFFER_BIT: + break; default: goto cleanup_surf; } @@ -130,7 +132,7 @@ dri2_drm_create_surface(_EGLDriver *drv, dri2_surf-dri_drawable = (*dri2_dpy-dri2-createNewDrawable) (dri2_dpy-dri_screen, dri2_conf-dri_double_config, - dri2_surf-gbm_surf); + dri2_surf); } else { assert(dri2_dpy-swrast != NULL); @@ -153,6 +155,15 @@ dri2_drm_create_surface(_EGLDriver *drv, return NULL; } +static inline _EGLSurface * +dri2_drm_create_pbuffer_surface(_EGLDriver *drv, _EGLDisplay *disp, + _EGLConfig *conf, + const EGLint *attrib_list) +{ + return dri2_drm_create_surface(drv, disp, EGL_PBUFFER_BIT, conf, + NULL, attrib_list); +} + static _EGLSurface * dri2_drm_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf, void *native_window, @@ -575,7 +586,7 @@ static struct dri2_egl_display_vtbl dri2 .authenticate = dri2_drm_authenticate, .create_window_surface = dri2_drm_create_window_surface, .create_pixmap_surface = dri2_drm_create_pixmap_surface, - .create_pbuffer_surface = dri2_fallback_create_pbuffer_surface, + .create_pbuffer_surface = dri2_drm_create_pbuffer_surface, .destroy_surface = dri2_drm_destroy_surface, .create_image = dri2_drm_create_image_khr, .swap_interval = dri2_fallback_swap_interval, @@ -596,6 +607,7 @@ dri2_initialize_drm(_EGLDriver *drv, _EG struct gbm_device *gbm; int fd = -1; int i; + EGLint surface_type; loader_set_logger(_eglLog); @@ -691,8 +703,9 @@ dri2_initialize_drm(_EGLDriver *drv, _EG attr_list[1] = format; attr_list[2] = EGL_NONE; + surface_type = EGL_WINDOW_BIT | EGL_PBUFFER_BIT; dri2_add_config(disp, dri2_dpy-driver_configs[i], - i + 1, EGL_WINDOW_BIT, attr_list, NULL); + i + 1, surface_type, attr_list, NULL); } disp-Extensions.KHR_image_pixmap = EGL_TRUE; ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] enable pbuffer for drm platform
On Fri, May 29, 2015 at 3:17 PM, Liu, Ying2 ying2@intel.com wrote: When we tried to enable EGL on our project, we found that there was no pbuffer support for drm platform. I got some help from Alan and Kristian. Thank you so much! I studied platform_drm.c and compared it to other platforms’ code. Finally I was able to create this patch to enable pbuffer for drm platform. Our QA team has run full cycle testing and didn’t find any regression. I wonder if anybody could review this patch to see if we could submit it to mesa. Thanks Ying Thanks for the patch. I don't know much about EGL or pbuffers, but I can offer some advice on patch submission. Firstly, the patch cannot be applied directly, since it's been line wrapped by your email client. The best way to send patches is with `git send-email` which will do all the proper formatting to ensure it can be easily applied. Also, patches should have an appropriate title and commit summary. The title is of the form prefix: Imperative sentence, like egl: Add pbuffer support for drm platform. Perhaps a small description of why this is useful would be nice in the commit summary as well. --- mesa-10.5.5.orig/src/egl/drivers/dri2/egl_dri2.c 2015-05-11 12:10:59.0 -0700 +++ mesa-10.5.5/src/egl/drivers/dri2/egl_dri2.c 2015-05-26 04:06:16.353346946 -0700 @@ -869,6 +869,10 @@ dri2_create_context(_EGLDriver *drv, _EG */ if (conf-SurfaceType EGL_WINDOW_BIT) dri2_ctx-base.WindowRenderBuffer = EGL_BACK_BUFFER; + + if (conf-SurfaceType EGL_PBUFFER_BIT) + dri2_ctx-base.WindowRenderBuffer = EGL_BACK_BUFFER; The indentation on this line is different from the one in the EGL_WINDOW_BIT case above. Keep it consistent with the surrounding code. + } else dri_config = NULL; diff -rupN mesa-10.5.5.orig/src/egl/drivers/dri2/platform_drm.c mesa-10.5.5/src/egl/drivers/dri2/platform_drm.c --- mesa-10.5.5.orig/src/egl/drivers/dri2/platform_drm.c2015-05-11 12:10:59.0 -0700 +++ mesa-10.5.5/src/egl/drivers/dri2/platform_drm.c 2015-05-26 04:04:25.293346505 -0700 @@ -122,6 +122,8 @@ dri2_drm_create_surface(_EGLDriver *drv, dri2_surf-base.Height = surf-base.height; surf-dri_private = dri2_surf; break; + case EGL_PBUFFER_BIT: + break; default: goto cleanup_surf; } @@ -130,7 +132,7 @@ dri2_drm_create_surface(_EGLDriver *drv, dri2_surf-dri_drawable = (*dri2_dpy-dri2-createNewDrawable) (dri2_dpy-dri_screen, dri2_conf-dri_double_config, - dri2_surf-gbm_surf); + dri2_surf); I'm not familiar with the code, but this change seems suspect... not only because immediately below this is another call to createNewDrawable that does not contain the same change. } else { assert(dri2_dpy-swrast != NULL); @@ -153,6 +155,15 @@ dri2_drm_create_surface(_EGLDriver *drv, return NULL; } +static inline _EGLSurface * Don't use inline unless you know better than the compiler. In this case it's particularly unhelpful, since the function is used in a vtable... so it cannot be inline. +dri2_drm_create_pbuffer_surface(_EGLDriver *drv, _EGLDisplay *disp, + _EGLConfig *conf, + const EGLint *attrib_list) The style for line-wrapped argument lists is to align them with the opening ( See dri2_drm_create_window_surface for example. +{ + return dri2_drm_create_surface(drv, disp, EGL_PBUFFER_BIT, conf, + NULL, attrib_list); The same comment applies here as well. +} + static _EGLSurface * dri2_drm_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf, void *native_window, @@ -575,7 +586,7 @@ static struct dri2_egl_display_vtbl dri2 .authenticate = dri2_drm_authenticate, .create_window_surface = dri2_drm_create_window_surface, .create_pixmap_surface = dri2_drm_create_pixmap_surface, - .create_pbuffer_surface = dri2_fallback_create_pbuffer_surface, + .create_pbuffer_surface = dri2_drm_create_pbuffer_surface, .destroy_surface = dri2_drm_destroy_surface, .create_image = dri2_drm_create_image_khr, .swap_interval = dri2_fallback_swap_interval, @@ -596,6 +607,7 @@ dri2_initialize_drm(_EGLDriver *drv, _EG struct gbm_device *gbm; int fd = -1; int i; + EGLint surface_type; loader_set_logger(_eglLog); @@ -691,8 +703,9 @@ dri2_initialize_drm(_EGLDriver *drv, _EG attr_list[1] = format; attr_list[2] = EGL_NONE; + surface_type = EGL_WINDOW_BIT | EGL_PBUFFER_BIT; dri2_add_config(disp, dri2_dpy-driver_configs[i], - i + 1, EGL_WINDOW_BIT, attr_list, NULL); +
Re: [Mesa-dev] [PATCH 1/6] i965: Consolidate certain miptree params to flags
On Fri 29 May 2015, Kenneth Graunke wrote: On Friday, May 29, 2015 12:33:10 PM Chad Versace wrote: On Fri 29 May 2015, Matt Turner wrote: On Thu, May 28, 2015 at 10:21 AM, Ben Widawsky @@ -286,7 +284,7 @@ intel_miptree_create_layout(struct brw_context *brw, mt-logical_height0 = height0; mt-logical_depth0 = depth0; mt-fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS; - mt-disable_aux_buffers = disable_aux_buffers; + mt-disable_aux_buffers = !!(layout_flags MIPTREE_LAYOUT_DISABLE_AUX); FWIW, I much prefer (x y) != 0 to !!(x y). Matt, in the C code you've encountered in the wild, do you feel that `(x y) != 0` is more prevalent than `!!(x y)`? I'm curious, because we should probably choose the idiom which is more recognizable. For the record, I slightly prefer !! because I've encountered it often in idiomatic Python, but it really doesn't matter to me. I suspect that != 0 may be the more common idiom in C. I prefer != 0 as well. I'm convinced that != 0 is the right choice here, despite my personal preference. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/3] i965: Use proper pitch for scalar GS pull constants and UBOs.
On Fri, May 29, 2015 at 12:26:40PM -0700, Kenneth Graunke wrote: See the corresponding code in brw_vs_surface_state.c. Signed-off-by: Kenneth Graunke kenn...@whitecape.org --- src/mesa/drivers/dri/i965/brw_gs_surface_state.c | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c index a323e4d..bfc4516 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c @@ -47,11 +47,12 @@ brw_upload_gs_pull_constants(struct brw_context *brw) return; /* BRW_NEW_GS_PROG_DATA */ - const struct brw_stage_prog_data *prog_data = brw-gs.prog_data-base.base; + struct brw_vue_prog_data *prog_data = brw-gs.prog_data-base; + bool dword_pitch = prog_data-dispatch_mode == DISPATCH_MODE_SIMD8; /* _NEW_PROGRAM_CONSTANTS */ brw_upload_pull_constants(brw, BRW_NEW_GS_CONSTBUF, gp-program.Base, - stage_state, prog_data, false); + stage_state, prog_data-base, dword_pitch); } const struct brw_tracked_state brw_gs_pull_constants = { @@ -77,8 +78,11 @@ brw_upload_gs_ubo_surfaces(struct brw_context *brw) return; /* BRW_NEW_GS_PROG_DATA */ + struct brw_vue_prog_data *prog_data = brw-gs.prog_data-base; + bool dword_pitch = prog_data-dispatch_mode == DISPATCH_MODE_SIMD8; + brw_upload_ubo_surfaces(brw, prog-_LinkedShaders[MESA_SHADER_GEOMETRY], -brw-gs.base, brw-gs.prog_data-base.base, false); +brw-gs.base, prog_data-base, dword_pitch); } const struct brw_tracked_state brw_gs_ubo_surfaces = { See comment in the previous patch... but as you like: Reviewed-by: Ben Widawsky b...@bwidawsk.net -- Ben Widawsky, Intel Open Source Technology Center ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/6] i965/gen8: Correct HALIGN for AUX surfaces
On Fri 29 May 2015, Chad Versace wrote: On Thu 28 May 2015, Ben Widawsky wrote: This restriction was attempted in this commit: commit 47053464630888f819ef8cc44278f1a1220159b9 Author: Anuj Phogat anuj.pho...@gmail.com Date: Fri Feb 13 11:21:21 2015 -0800 i965/gen8: Use HALIGN_16 if MCS is enabled for non-MSRT However, the commit itself doesn't achieve the desired goal as determined by the asserts which the next patch adds. mcs_mt is never a value because we're in the process of allocating the mcs_mt miptree when we get to this function. I didn't check, but perhaps this would work with blorp, however, meta clears allocate the miptree structure (which AFAICT needs the alignment also) way before it allocates using meta clears where the renderbuffer is allocated way before the aux buffer. The restriction is referenced in a few places, but the most concise one [IMO] from the spec is for Gen9. Gen8 8 loosens the restriction in that it only requires this for non-msrt surface. When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN 16 must be used. With the code before the miptree layout flag rework (patches preceding this), accomplishing this workaround is very difficult. Cc: Anuj Phogat anuj.pho...@gmail.com Cc: Neil Roberts n...@linux.intel.com Signed-off-by: Ben Widawsky b...@bwidawsk.net --- src/mesa/drivers/dri/i965/brw_tex_layout.c| 16 ++-- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 15 --- src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 4 +++- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 72b02a2..b51c7c7 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -41,8 +41,13 @@ static unsigned int intel_horizontal_texture_alignment_unit(struct brw_context *brw, -struct intel_mipmap_tree *mt) +struct intel_mipmap_tree *mt, +uint32_t layout_flags) { + + if (layout_flags MIPTREE_LAYOUT_FORCE_HALIGN16) + return 16; + This snippet makes sense. If the force flag is set, then don't bother inspecting any other state. /** * From the Alignment Unit Size section of various specs, namely: * - Gen3 Spec: Memory Data Formats Volume, Section 1.20.1.4 @@ -91,9 +96,6 @@ intel_horizontal_texture_alignment_unit(struct brw_context *brw, if (brw-gen = 7 mt-format == MESA_FORMAT_Z_UNORM16) return 8; - if (brw-gen == 8 mt-mcs_mt mt-num_samples = 1) - return 16; - return 4; } @@ -459,7 +461,8 @@ brw_miptree_layout_texture_3d(struct brw_context *brw, } void -brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt) +brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt, + uint32_t layout_flags) { bool multisampled = mt-num_samples 1; bool gen6_hiz_or_stencil = false; @@ -492,8 +495,9 @@ brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt) mt-align_w = 128 / mt-cpp; mt-align_h = 32; } + assert((layout_flags MIPTREE_LAYOUT_FORCE_HALIGN16) == 0); This assertion makes sense. } else { - mt-align_w = intel_horizontal_texture_alignment_unit(brw, mt); + mt-align_w = intel_horizontal_texture_alignment_unit(brw, mt, layout_flags); mt-align_h = intel_vertical_texture_alignment_unit(brw, mt-format, multisampled); } diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 75ee19a..a1ac0cf 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -475,7 +475,10 @@ intel_miptree_create_layout(struct brw_context *brw, if (layout_flags MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD) mt-array_layout = ALL_SLICES_AT_EACH_LOD; - brw_miptree_layout(brw, mt); + if (intel_miptree_is_fast_clear_capable(brw, mt)) + layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16; + + brw_miptree_layout(brw, mt, layout_flags); This does not make sense to me. I though HALIGN16 didn't exist before Skylake, but here you're setting it on Ivybridge through Broadwell too. Am I misunderstanding something? I did some homewwork. HALIGN16 was introduced in Broadwell and does not exist in Haswell. So this code still looks wrong to me. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/6] i965/gen9: Set HALIGN_16 for all aux buffers
On Thu 28 May 2015, Ben Widawsky wrote: Just like the previous patch, but for the GEN9 constraints. Signed-off-by: Ben Widawsky b...@bwidawsk.net --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 5 + 1 file changed, 5 insertions(+) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index a1ac0cf..89030aa 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -655,6 +655,11 @@ intel_miptree_create(struct brw_context *brw, assert((layout_flags MIPTREE_LAYOUT_DISABLE_AUX) == 0); assert((layout_flags MIPTREE_LAYOUT_FOR_BO) == 0); + + if (brw-gen = 9 num_samples 1) + layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16; + The commit subject says this change applies to all aux buffers, and does not mention non-aux buffers. But the patch applies HALIGN16 to a large set of non-aux buffers: all multisample buffers. Should the patch's code be fixed to agree with the commit subject, or the converse? ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/6] i965: Consolidate certain miptree params to flags
On Friday, May 29, 2015 12:33:10 PM Chad Versace wrote: On Fri 29 May 2015, Matt Turner wrote: On Thu, May 28, 2015 at 10:21 AM, Ben Widawsky @@ -286,7 +284,7 @@ intel_miptree_create_layout(struct brw_context *brw, mt-logical_height0 = height0; mt-logical_depth0 = depth0; mt-fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS; - mt-disable_aux_buffers = disable_aux_buffers; + mt-disable_aux_buffers = !!(layout_flags MIPTREE_LAYOUT_DISABLE_AUX); FWIW, I much prefer (x y) != 0 to !!(x y). Matt, in the C code you've encountered in the wild, do you feel that `(x y) != 0` is more prevalent than `!!(x y)`? I'm curious, because we should probably choose the idiom which is more recognizable. For the record, I slightly prefer !! because I've encountered it often in idiomatic Python, but it really doesn't matter to me. I suspect that != 0 may be the more common idiom in C. I prefer != 0 as well. signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/6] i965: Consolidate certain miptree params to flags
On Fri 29 May 2015, Pohjolainen, Topi wrote: On Fri, May 29, 2015 at 09:32:53AM +0300, Pohjolainen, Topi wrote: On Thu, May 28, 2015 at 10:21:29AM -0700, Ben Widawsky wrote: I think pretty much everyone agrees that having more than a single bool as a function argument is bordering on a bad idea. What sucks about the current code is in several instances it's necessary to propagate these boolean selections down to lower layers of the code. This requires plumbing (mechanical, but still churn) pretty much all of the miptree functions each time. By introducing the flags paramater, it is possible to add miptree constraints very easily. I like this a lot. I have a few simple comments below. Me too. This is a good cleanup. diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index aebed72..1b3a72f 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -390,7 +390,7 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx, image-height, 1, image-pitch, - true /*disable_aux_buffers*/); + MIPTREE_LAYOUT_DISABLE_AUX); if (!irb-mt) return; @@ -1027,10 +1027,9 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw, intel_image-base.Base.Level, intel_image-base.Base.Level, width, height, depth, - true, irb-mt-num_samples, INTEL_MIPTREE_TILING_ANY, - false); + MIPTREE_LAYOUT_ACCELERATED_UPLOAD); if (intel_miptree_wants_hiz_buffer(brw, new_mt)) { intel_miptree_alloc_hiz(brw, new_mt); diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 24a5c3d..b243f8a 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -244,10 +244,8 @@ intel_miptree_create_layout(struct brw_context *brw, GLuint width0, GLuint height0, GLuint depth0, -bool for_bo, GLuint num_samples, -bool force_all_slices_at_each_lod, -bool disable_aux_buffers) +uint32_t layout_flags) { struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1); if (!mt) @@ -286,7 +284,7 @@ intel_miptree_create_layout(struct brw_context *brw, mt-logical_height0 = height0; mt-logical_depth0 = depth0; mt-fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS; - mt-disable_aux_buffers = disable_aux_buffers; + mt-disable_aux_buffers = !!(layout_flags MIPTREE_LAYOUT_DISABLE_AUX); You didn't mean to have double negation (!!), did you? I actually meant that isn't layout_flags MIPTREE_LAYOUT_DISABLE_AUX enough on its own? I think `layout_flags MIPTREE_LAYOUT_DISABLE_AUX` is sufficient solely because mt-disable_aux_buffers has type bool. I prefer to keep Ben's original `!!`, though`, because it feels more future-proof against future code changes. @@ -422,12 +420,14 @@ intel_miptree_create_layout(struct brw_context *brw, mt-physical_height0 = height0; mt-physical_depth0 = depth0; - if (!for_bo + if (!(layout_flags MIPTREE_LAYOUT_FOR_BO) _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL (brw-must_use_separate_stencil || (brw-has_separate_stencil intel_miptree_wants_hiz_buffer(brw, mt { - const bool force_all_slices_at_each_lod = brw-gen == 6; + uint32_t stencil_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD; + if (brw-gen == 6) + stencil_flags |= MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD; Perhaps a separating line here, your choice. +1 @@ -527,6 +527,10 @@ bool intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, struct intel_mipmap_tree *mt); +#define MIPTREE_LAYOUT_ACCELERATED_UPLOAD (10) I was wondering if we could call the the flags something else than layout. The manner of upload doesn't technically have much to do with layout. Maybe just flags, shrug. I have nitpick too :) I'd like to see these defined in an anonymous enum block so the token names show up in gdb. enum { MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD = 1 1, ..., }; I agree with
Re: [Mesa-dev] [PATCH 1/2] i965: Don't use a temporary when generating an indirect sample
This thing has been trouble since I wrote it. Nice to see it go. Both patches are: Reviewed-by: Chris Forbes chr...@ijw.co.nz On May 30, 2015 6:28 AM, Matt Turner matts...@gmail.com wrote: On Fri, May 29, 2015 at 6:53 AM, Neil Roberts n...@linux.intel.com wrote: Previously when generating the send instruction for a sample instruction with an indirect sampler it would use the destination register as a temporary store. This breaks when used in combination with the opt_sampler_eot optimisation because that forces the destination to be null. This patch fixes that by avoiding the temp register altogether. The reason the temporary register was needed was because it was trying to ensure the binding table index doesn't overflow a byte by and'ing it with 0xff. The result is then or'd with samper_index8. This patch instead just and's the whole thing by 0xfff. This will ensure that a bogus sampler index won't overflow into the rest of the message descriptor but unlike the previous code it won't ensure that the binding table index doesn't overflow into the sampler index. It doesn't seem like that should matter very much though because if the shader is generating a bogus sampler index then it's going to just get garbage out either way. Instead of doing sampler_index8|(sampler_index+base_table_index) the new code avoids one operation by doing sampler_index*0x101+base_table_index which should be equivalent. However if we wanted to avoid the multiply for some reason we could do this by adding an extra or instruction still without needing the temporary register. This fixes a number of Piglit tests on Skylake that were using indirect samplers such as: spec@arb_gpu_shader5@execution@sampler_array_indexing@fs-simple --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 17 - src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 16 2 files changed, 8 insertions(+), 25 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 0be0f86..ea46b1a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -779,27 +779,18 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src brw_mark_surface_used(prog_data, sampler + base_binding_table_index); } else { /* Non-const sampler index */ - /* Note: this clobbers `dst` as a temporary before emitting the send */ struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); - struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD)); - struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD)); brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); - /* Some care required: `sampler` and `temp` may alias: - *addr = sampler 0xff - *temp = (sampler 8) 0xf00 - *addr = addr | temp - */ - brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index)); - brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u)); - brw_AND(p, temp, temp, brw_imm_ud(0x0f00)); - brw_AND(p, addr, addr, brw_imm_ud(0x0ff)); - brw_OR(p, addr, addr, temp); + /* addr = ((sampler * 0x101) + base_binding_table_index) 0xfff */ + brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101)); + brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); + brw_AND(p, addr, addr, brw_imm_ud(0xfff)); brw_pop_insn_state(p); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 20d096c..1d3f5ed 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -398,10 +398,8 @@ vec4_generator::generate_tex(vec4_instruction *inst, brw_mark_surface_used(prog_data-base, sampler + base_binding_table_index); } else { /* Non-constant sampler index. */ - /* Note: this clobbers `dst` as a temporary before emitting the send */ struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); - struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD)); I'd delete the blank line here as well to match the fs code. Really nice solution. I'd been trying to figure out the best way to get an additional temporary in here, but this is clearly better. Reviewed-by: Matt Turner matts...@gmail.com ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org
[Mesa-dev] [PATCH 3/3] i965: Use proper pitch for scalar GS pull constants and UBOs.
See the corresponding code in brw_vs_surface_state.c. Signed-off-by: Kenneth Graunke kenn...@whitecape.org --- src/mesa/drivers/dri/i965/brw_gs_surface_state.c | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c index a323e4d..bfc4516 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c @@ -47,11 +47,12 @@ brw_upload_gs_pull_constants(struct brw_context *brw) return; /* BRW_NEW_GS_PROG_DATA */ - const struct brw_stage_prog_data *prog_data = brw-gs.prog_data-base.base; + struct brw_vue_prog_data *prog_data = brw-gs.prog_data-base; + bool dword_pitch = prog_data-dispatch_mode == DISPATCH_MODE_SIMD8; /* _NEW_PROGRAM_CONSTANTS */ brw_upload_pull_constants(brw, BRW_NEW_GS_CONSTBUF, gp-program.Base, - stage_state, prog_data, false); + stage_state, prog_data-base, dword_pitch); } const struct brw_tracked_state brw_gs_pull_constants = { @@ -77,8 +78,11 @@ brw_upload_gs_ubo_surfaces(struct brw_context *brw) return; /* BRW_NEW_GS_PROG_DATA */ + struct brw_vue_prog_data *prog_data = brw-gs.prog_data-base; + bool dword_pitch = prog_data-dispatch_mode == DISPATCH_MODE_SIMD8; + brw_upload_ubo_surfaces(brw, prog-_LinkedShaders[MESA_SHADER_GEOMETRY], - brw-gs.base, brw-gs.prog_data-base.base, false); + brw-gs.base, prog_data-base, dword_pitch); } const struct brw_tracked_state brw_gs_ubo_surfaces = { -- 2.4.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] i965: Create a shader_dispatch_mode enum to replace VS/GS fields.
We used to store the GS dispatch mode in brw_gs_prog_data while separately storing the VS dispatch mode in brw_vue_prog_data::simd8. This patch introduces an enum to represent all possible dispatch modes, and stores it in brw_vue_prog_data::dispatch_mode, unifying the two. Based on a suggestion by Matt Turner. Signed-off-by: Kenneth Graunke kenn...@whitecape.org --- src/mesa/drivers/dri/i965/brw_context.h | 16 +++- src/mesa/drivers/dri/i965/brw_defines.h | 5 ++--- src/mesa/drivers/dri/i965/brw_vec4.cpp| 5 - src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 8 src/mesa/drivers/dri/i965/brw_vs_surface_state.c | 4 ++-- src/mesa/drivers/dri/i965/gen7_gs_state.c | 2 +- src/mesa/drivers/dri/i965/gen8_gs_state.c | 3 ++- src/mesa/drivers/dri/i965/gen8_vs_state.c | 3 ++- 8 files changed, 24 insertions(+), 22 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index abc11f6..01c4283 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -605,6 +605,12 @@ struct brw_ff_gs_prog_data { unsigned svbi_postincrement_value; }; +enum shader_dispatch_mode { + DISPATCH_MODE_4X1_SINGLE = 0, + DISPATCH_MODE_4X2_DUAL_INSTANCE = 1, + DISPATCH_MODE_4X2_DUAL_OBJECT = 2, + DISPATCH_MODE_SIMD8 = 3, +}; /* Note: brw_vue_prog_data_compare() must be updated when adding fields to * this struct! @@ -622,7 +628,7 @@ struct brw_vue_prog_data { */ GLuint urb_entry_size; - bool simd8; + enum shader_dispatch_mode dispatch_mode; }; @@ -720,14 +726,6 @@ struct brw_gs_prog_data int invocations; /** -* Dispatch mode, can be any of: -* GEN7_GS_DISPATCH_MODE_DUAL_OBJECT -* GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE -* GEN7_GS_DISPATCH_MODE_SINGLE -*/ - int dispatch_mode; - - /** * Gen6 transform feedback enabled flag. */ bool gen6_xfb_enabled; diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index dedc381..f6da305 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1773,9 +1773,8 @@ enum brw_message_target { # define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID 1 # define GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT20 # define GEN7_GS_INSTANCE_CONTROL_SHIFT15 -# define GEN7_GS_DISPATCH_MODE_SINGLE (0 11) -# define GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE (1 11) -# define GEN7_GS_DISPATCH_MODE_DUAL_OBJECT (2 11) +# define GEN7_GS_DISPATCH_MODE_SHIFT11 +# define GEN7_GS_DISPATCH_MODE_MASK INTEL_MASK(12, 11) # define GEN6_GS_STATISTICS_ENABLE (1 10) # define GEN6_GS_SO_STATISTICS_ENABLE (1 9) # define GEN6_GS_RENDERING_ENABLE (1 8) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index a324798..5a9c3f5 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1894,6 +1894,8 @@ brw_vs_emit(struct brw_context *brw, brw_create_nir(brw, NULL, c-vp-program.Base, MESA_SHADER_VERTEX); } + prog_data-base.dispatch_mode = DISPATCH_MODE_SIMD8; + fs_visitor v(brw, mem_ctx, MESA_SHADER_VERTEX, c-key, prog_data-base.base, prog, c-vp-program.Base, 8); if (!v.run_vs()) { @@ -1926,11 +1928,12 @@ brw_vs_emit(struct brw_context *brw, g.generate_code(v.cfg, 8); assembly = g.get_assembly(final_assembly_size); - prog_data-base.simd8 = true; c-base.last_scratch = v.last_scratch; } if (!assembly) { + prog_data-base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; + vec4_vs_visitor v(brw, c, prog_data, prog, mem_ctx); if (!v.run()) { if (prog) { diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 363e30e..eacb2f5 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -106,7 +106,7 @@ vec4_gs_visitor::setup_payload() * to be interleaved, so one register contains two attribute slots. */ int attributes_per_reg = - c-prog_data.dispatch_mode == GEN7_GS_DISPATCH_MODE_DUAL_OBJECT ? 1 : 2; + c-prog_data.base.dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2; /* If a geometry shader tries to read from an input that wasn't written by * the vertex shader, that produces undefined results, but it shouldn't @@ -655,7 +655,7 @@ brw_gs_emit(struct brw_context *brw, */ if (c-prog_data.invocations = 1 likely(!(INTEL_DEBUG DEBUG_NO_DUAL_OBJECT_GS))) { - c-prog_data.dispatch_mode = GEN7_GS_DISPATCH_MODE_DUAL_OBJECT; +
[Mesa-dev] [PATCH 1/3] i965: Drop Vector Mask Enable bit from 3DSTATE_GS on Gen8+.
The documentation makes it pretty clear that we shouldn't use this: Under normal conditions SW shall specify DMask, as the GS stage will provide a Dispatch Mask appropriate to SIMD4x2 or SIMD8 thread execution (as a function of dispatch mode). E.g., for SIMD4x2 execution, the GS stage will generate a Dispatch Mask that is equal to what the EU would use as the Vector Mask. For SIMD8 execution there is no known usage model for use of Vector Mask (as there is for PS shaders). I also managed to find descriptions of DMask and VMask, in the State Register (sr0.2/3) field descriptions: Dispatch Mask (DMask). This 32-bit field specifies which channels are active at Dispatch time. Vector Mask (VMask). This 32-bit field contains, for each 4-bit group, the OR of the corresponding 4-bit group in the dispatch mask. SIMD4x2 shaders process one or two vec4 values, with each 4-bit group corresponding to xyzw channel enables (either all on, or all off). Thus, DMask = VMask in SIMD4x2 mode. But in SIMD8 mode, 4-bit groups are meaningless, so it just messes up your values. Signed-off-by: Kenneth Graunke kenn...@whitecape.org --- src/mesa/drivers/dri/i965/gen8_gs_state.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) I'm working on SIMD8 geometry shader support. These are a few simple patches that are pretty straightforward and could be landed on their own. diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c index 6a0e215..0763e91 100644 --- a/src/mesa/drivers/dri/i965/gen8_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c @@ -48,8 +48,7 @@ gen8_upload_gs_state(struct brw_context *brw) OUT_BATCH(_3DSTATE_GS 16 | (10 - 2)); OUT_BATCH(stage_state-prog_offset); OUT_BATCH(0); - OUT_BATCH(GEN6_GS_VECTOR_MASK_ENABLE | -brw-geometry_program-VerticesIn | + OUT_BATCH(brw-geometry_program-VerticesIn | ((ALIGN(stage_state-sampler_count, 4)/4) GEN6_GS_SAMPLER_COUNT_SHIFT) | ((prog_data-base.binding_table.size_bytes / 4) -- 2.4.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/6] i965/gen9: Only allow Y-Tiled MCS buffers
On Thu 28 May 2015, Ben Widawsky wrote: For GEN9, much of the logic to use X-Tiled buffers has been stripped out. It is still supported in some places, but it's never desirable. Unfortunately we don't yet have the ability to have Y-Tiled scanout (see: http://patchwork.freedesktop.org/patch/46984/), NOTE: This patch shouldn't actually do anything since SKL doesn't yet use fast clears, and that's the only case we can get to this function (by way of intel_update_winsys_renderbuffer_miptree) Signed-off-by: Ben Widawsky b...@bwidawsk.net Reviewed-by: Chad Versace chad.vers...@intel.com ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/6] i965: Consolidate certain miptree params to flags
On Thu, May 28, 2015 at 10:21 AM, Ben Widawsky benjamin.widaw...@intel.com wrote: I think pretty much everyone agrees that having more than a single bool as a function argument is bordering on a bad idea. What sucks about the current code is in several instances it's necessary to propagate these boolean selections down to lower layers of the code. This requires plumbing (mechanical, but still churn) pretty much all of the miptree functions each time. By introducing the flags paramater, it is possible to add miptree constraints very easily. The use of this, as is already the case, is sometimes we have some information at the time we create the miptree that needs to be known all the way at the lowest levels of the create/allocation, disable_aux_buffers is currently one such example. There will be another example coming up in a few patches. Cc: Chad Versace chad.vers...@linux.intel.com Signed-off-by: Ben Widawsky b...@bwidawsk.net --- src/mesa/drivers/dri/i965/intel_fbo.c | 5 +- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 86 +- src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 9 ++- src/mesa/drivers/dri/i965/intel_pixel_draw.c | 2 +- src/mesa/drivers/dri/i965/intel_tex.c | 8 +-- src/mesa/drivers/dri/i965/intel_tex.h | 2 +- src/mesa/drivers/dri/i965/intel_tex_image.c| 14 ++--- src/mesa/drivers/dri/i965/intel_tex_validate.c | 3 +- 8 files changed, 63 insertions(+), 66 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index aebed72..1b3a72f 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -390,7 +390,7 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx, image-height, 1, image-pitch, - true /*disable_aux_buffers*/); + MIPTREE_LAYOUT_DISABLE_AUX); if (!irb-mt) return; @@ -1027,10 +1027,9 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw, intel_image-base.Base.Level, intel_image-base.Base.Level, width, height, depth, - true, irb-mt-num_samples, INTEL_MIPTREE_TILING_ANY, - false); + MIPTREE_LAYOUT_ACCELERATED_UPLOAD); if (intel_miptree_wants_hiz_buffer(brw, new_mt)) { intel_miptree_alloc_hiz(brw, new_mt); diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 24a5c3d..b243f8a 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -244,10 +244,8 @@ intel_miptree_create_layout(struct brw_context *brw, GLuint width0, GLuint height0, GLuint depth0, -bool for_bo, GLuint num_samples, -bool force_all_slices_at_each_lod, -bool disable_aux_buffers) +uint32_t layout_flags) { struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1); if (!mt) @@ -286,7 +284,7 @@ intel_miptree_create_layout(struct brw_context *brw, mt-logical_height0 = height0; mt-logical_depth0 = depth0; mt-fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS; - mt-disable_aux_buffers = disable_aux_buffers; + mt-disable_aux_buffers = !!(layout_flags MIPTREE_LAYOUT_DISABLE_AUX); FWIW, I much prefer (x y) != 0 to !!(x y). ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 15/15] egl: expose EGL 1.5 if all requirements are met
Yes, everything is reviewed. Marek On Fri, May 29, 2015 at 8:40 PM, Chad Versace chad.vers...@intel.com wrote: The last two patches, 14-15, are Reviewed-by: Chad Versace chad.vers...@intel.com I think I've reviewed all the patches in this series. Please let me know if I missed any. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/6] i965: Extract tiling from fast clear decision
On Thu 28 May 2015, Ben Widawsky wrote: There are several constraints when determining if one can fast clear a surface. Some of these are alignment, pixel density, tiling formats, and others that vary by generation. The helper function which exists today does a suitable job, however it conflates BO properties with Miptree properties when using tiling. I consider the former to be attributes of the physical surface, things which are determined through BO allocation, and the latter being attributes which are derived from the API, and having nothing to do with the underlying surface. Tiling properties are a distinct operation from the creation of a miptree, and by removing this, we gain flexibility throughout the code to make determinations about when we can or cannot fast clear strictly on the miptree. I don't understand the above sentence. Tiling properties cannot be a distinct operation, because tiling properties is not an operation at all. I'm not being a grammar jerk. I'm just dense and don't understand this paragraph's claims. To signify this change, I've also renamed the function to indicate it is a distinction made on the miptree. I am torn as to whether or not it was a good idea to remove non_msrt since it's a really nice thing for grep. Cc: Chad Versace chad.vers...@linux.intel.com Signed-off-by: Ben Widawsky b...@bwidawsk.net --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 37 +++ src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 11 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 68d405c..75ee19a 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -158,15 +158,33 @@ intel_get_non_msrt_mcs_alignment(struct brw_context *brw, } } +/** + * Determine the BO backing the miptree has a suitable tile format. For Gen7, + * and 8 this means any tiled format. Hmm... a suitable tile format for what? How about this rephrasing? Does the hardware support non-MSRT for this tiling format + * + * From the Ivy Bridge PRM, Vol2 Part1 11.7 MCS Buffer for Render Target(s), + * beneath the Fast Color Clear bullet (p326): + * + * - Support is limited to tiled render targets. + */ +bool +intel_is_non_msrt_mcs_tile_supported(struct brw_context *brw, + unsigned tiling) The function name is misleading. It hints that there exists such a thing as an MCS tile. Perhaps rename it to intel_tiling_supports_non_msrt_mcs? +{ + if (brw-gen = 9) + return tiling == I915_TILING_Y; + + return tiling != I915_TILING_NONE; To be complete, the if-ladder should be... if (brw-gen = 9) { return tiling == I915_TILIING_Y; } else if (brw-gen = 7) { return tiling != I915_TILING_NONE; } else { return false; } ...because gen6 and below do not support MCS (iirc). +} /** * For a single-sampled render target (non-MSRT), determine if an MCS buffer - * can be used. + * can be used. This doesn't (and should not) inquire about the BO properties of + * the buffer. The comment stutters and confuses me, because acronym expansion results in: This doesn't (and should not) inquire about the buffer object properties of the buffer. Which is equivalent to: This doesn't (and should not) inquire about the buffer's buffer object properties. Instead of referring the buffer's buffer object properties, I think it's much clearer to refer to the properties of the miptree's BO: This doesn't (and should not) inspect any properties of the miptree's BO. Other than the naming and documentation nitpicks, the code itself looks good to me. I like the clean separation the patch introduces. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/6] i965: Consolidate certain miptree params to flags
On Fri 29 May 2015, Matt Turner wrote: On Thu, May 28, 2015 at 10:21 AM, Ben Widawsky @@ -286,7 +284,7 @@ intel_miptree_create_layout(struct brw_context *brw, mt-logical_height0 = height0; mt-logical_depth0 = depth0; mt-fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS; - mt-disable_aux_buffers = disable_aux_buffers; + mt-disable_aux_buffers = !!(layout_flags MIPTREE_LAYOUT_DISABLE_AUX); FWIW, I much prefer (x y) != 0 to !!(x y). Matt, in the C code you've encountered in the wild, do you feel that `(x y) != 0` is more prevalent than `!!(x y)`? I'm curious, because we should probably choose the idiom which is more recognizable. For the record, I slightly prefer !! because I've encountered it often in idiomatic Python, but it really doesn't matter to me. I suspect that != 0 may be the more common idiom in C. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 12/15] egl: add eglCreateImage
On Thu 28 May 2015, Marek Olšák wrote: A new patch is attached. Please review. LGTM. Reviewed-by: Chad Versace chad.vers...@intel.com ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: Disable compaction for EOT send messages
On Wed, May 27, 2015 at 10:16 PM, Ben Widawsky benjamin.widaw...@intel.com wrote: AFAICT, there is no real way to make sure a send message with EOT is properly ignored from compact, nor can I see a way to actually encode EOT while compacting. Before the single send optimization we'd always bail because we hit the is_immediate !is_compactable_immediate case. However, with single send, is_immediate is not true, and so we end up trying to compact the un-compactible. I investigated, and yeah, your analysis is exactly right. The EOT bit (bit 127) is unused when an instruction doesn't have an immediate, so the compaction code doesn't read it in that case. So I think this patch (with the checking for SEND/SENDC opcodes, since they're the only instructions that can have EOT) is exactly what we want. Reviewed-by: Matt Turner matts...@gmail.com Thanks! ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] PIPE_SHADER_* vs TGSI_PROCESSOR_*
Sometimes, having to translate from one to the other seems like unnecessary annoyance. Marek On Fri, May 29, 2015 at 3:59 PM, Brian Paul bri...@vmware.com wrote: On 05/28/2015 05:36 PM, Marek Olšák wrote: Hi, Would people be okay with removing the TGSI_PROCESSOR_ enums and replacing all usages with PIPE_SHADER_*? It's kind of nice that the TGSI shader stuff is self-contained with no real dependency on gallium. This would change that in a tiny way. Is this causing pain? Would adjusting the TSGI/PIPE enum values so they match (VS/FS are mixed up) be of any help? -Brian ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] i965: Don't add base_binding_table_index if it's zero
On Fri, May 29, 2015 at 02:53:35PM +0100, Neil Roberts wrote: When calculating the binding table index for non-constant sampler array indexing it needs to add the base binding table index which is a constant within the generated code. Often this base is zero so we can avoid a redundant instruction in that case. It looks like nothing in shader-db is doing non-constant sampler array indexing so this patch doesn't make any difference but it might be worth having anyway. --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 3 ++- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index ea46b1a..40a3db3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -789,7 +789,8 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src /* addr = ((sampler * 0x101) + base_binding_table_index) 0xfff */ brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101)); - brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); + if (base_binding_table_index) + brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); brw_AND(p, addr, addr, brw_imm_ud(0xfff)); brw_pop_insn_state(p); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 1d3f5ed..cf1aa83 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -409,7 +409,8 @@ vec4_generator::generate_tex(vec4_instruction *inst, /* addr = ((sampler * 0x101) + base_binding_table_index) 0xfff */ brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101)); - brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); + if (base_binding_table_index) + brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); brw_AND(p, addr, addr, brw_imm_ud(0xfff)); brw_pop_insn_state(p); Both are: Acked-by: Ben Widawsky b...@bwidawsk.net (More knowledgeable people have left their review already, I just want it merged ASAP). -- Ben Widawsky, Intel Open Source Technology Center ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] i965: Don't use a temporary when generating an indirect sample
On Fri, May 29, 2015 at 6:53 AM, Neil Roberts n...@linux.intel.com wrote: Previously when generating the send instruction for a sample instruction with an indirect sampler it would use the destination register as a temporary store. This breaks when used in combination with the opt_sampler_eot optimisation because that forces the destination to be null. This patch fixes that by avoiding the temp register altogether. The reason the temporary register was needed was because it was trying to ensure the binding table index doesn't overflow a byte by and'ing it with 0xff. The result is then or'd with samper_index8. This patch instead just and's the whole thing by 0xfff. This will ensure that a bogus sampler index won't overflow into the rest of the message descriptor but unlike the previous code it won't ensure that the binding table index doesn't overflow into the sampler index. It doesn't seem like that should matter very much though because if the shader is generating a bogus sampler index then it's going to just get garbage out either way. Instead of doing sampler_index8|(sampler_index+base_table_index) the new code avoids one operation by doing sampler_index*0x101+base_table_index which should be equivalent. However if we wanted to avoid the multiply for some reason we could do this by adding an extra or instruction still without needing the temporary register. This fixes a number of Piglit tests on Skylake that were using indirect samplers such as: spec@arb_gpu_shader5@execution@sampler_array_indexing@fs-simple --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 17 - src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 16 2 files changed, 8 insertions(+), 25 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 0be0f86..ea46b1a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -779,27 +779,18 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src brw_mark_surface_used(prog_data, sampler + base_binding_table_index); } else { /* Non-const sampler index */ - /* Note: this clobbers `dst` as a temporary before emitting the send */ struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); - struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD)); - struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD)); brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); - /* Some care required: `sampler` and `temp` may alias: - *addr = sampler 0xff - *temp = (sampler 8) 0xf00 - *addr = addr | temp - */ - brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index)); - brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u)); - brw_AND(p, temp, temp, brw_imm_ud(0x0f00)); - brw_AND(p, addr, addr, brw_imm_ud(0x0ff)); - brw_OR(p, addr, addr, temp); + /* addr = ((sampler * 0x101) + base_binding_table_index) 0xfff */ + brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101)); + brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); + brw_AND(p, addr, addr, brw_imm_ud(0xfff)); brw_pop_insn_state(p); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 20d096c..1d3f5ed 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -398,10 +398,8 @@ vec4_generator::generate_tex(vec4_instruction *inst, brw_mark_surface_used(prog_data-base, sampler + base_binding_table_index); } else { /* Non-constant sampler index. */ - /* Note: this clobbers `dst` as a temporary before emitting the send */ struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); - struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD)); struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD)); @@ -409,16 +407,10 @@ vec4_generator::generate_tex(vec4_instruction *inst, brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); - /* Some care required: `sampler` and `temp` may alias: - *addr = sampler 0xff - *temp = (sampler 8) 0xf00 - *addr = addr | temp - */ - brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index)); - brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u)); - brw_AND(p, temp, temp, brw_imm_ud(0x0f00)); - brw_AND(p, addr, addr, brw_imm_ud(0x0ff)); - brw_OR(p, addr, addr, temp); + /* addr = ((sampler * 0x101) + base_binding_table_index)