Mesa (master): i965/reg: Make brw_sr0_reg take a subnr and return a vec1 reg
Module: Mesa Branch: master Commit: a2392cee48076f1fe6feab7d49214990cfa6a551 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a2392cee48076f1fe6feab7d49214990cfa6a551 Author: Jason Ekstrand Date: Wed Sep 14 15:09:32 2016 -0700 i965/reg: Make brw_sr0_reg take a subnr and return a vec1 reg The state register sr0 is really a collection of dwords not a SIMD8 anything. It's much more convenient for brw_sr0_reg to return the particular dword you're looking for rather than a giant blob you have to massage into what you want. Signed-off-by: Jason Ekstrand [ Francisco Jerez: Trivial simplification of brw_ud1_reg(). ] Reviewed-by: Francisco Jerez --- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- src/mesa/drivers/dri/i965/brw_reg.h | 20 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index d026bbd..5c44007 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -6185,7 +6185,7 @@ fs_visitor::run_cs() if (devinfo->is_haswell && prog_data->total_shared > 0) { /* Move SLM index from g0.0[27:24] to sr0.1[11:8] */ const fs_builder abld = bld.exec_all().group(1, 0); - abld.MOV(retype(suboffset(brw_sr0_reg(), 1), BRW_REGISTER_TYPE_UW), + abld.MOV(retype(brw_sr0_reg(1), BRW_REGISTER_TYPE_UW), suboffset(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW), 1)); } diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h index d6f22ed..b71c63b 100644 --- a/src/mesa/drivers/dri/i965/brw_reg.h +++ b/src/mesa/drivers/dri/i965/brw_reg.h @@ -567,6 +567,12 @@ brw_uw1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr) } static inline struct brw_reg +brw_ud1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr) +{ + return retype(brw_vec1_reg(file, nr, subnr), BRW_REGISTER_TYPE_UD); +} + +static inline struct brw_reg brw_imm_reg(enum brw_reg_type type) { return brw_reg(BRW_IMMEDIATE_VALUE, @@ -789,19 +795,9 @@ brw_notification_reg(void) } static inline struct brw_reg -brw_sr0_reg(void) +brw_sr0_reg(unsigned subnr) { - return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_STATE, - 0, - 0, - 0, - BRW_REGISTER_TYPE_UD, - BRW_VERTICAL_STRIDE_8, - BRW_WIDTH_8, - BRW_HORIZONTAL_STRIDE_1, - BRW_SWIZZLE_XYZW, - WRITEMASK_XYZW); + return brw_ud1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_STATE, subnr); } static inline struct brw_reg ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): Revert "Revert "mapi: export all GLES 3.2 functions in libGLESv2.so""
Module: Mesa Branch: master Commit: 811eb7f178b8b85ac299121ac09a3180b9b55da2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=811eb7f178b8b85ac299121ac09a3180b9b55da2 Author: Francisco Jerez Date: Tue Oct 18 20:44:10 2016 -0700 Revert "Revert "mapi: export all GLES 3.2 functions in libGLESv2.so"" This reverts commit 85e9bbc14d93fa7166c9ae075ee7ae29a8313e3f. The previous commit should help with the scons build failure caused by the original commit. Cc: mesa-sta...@lists.freedesktop.org Reviewed-by: Dylan Baker --- src/mapi/glapi/gen/static_data.py | 12 1 file changed, 12 insertions(+) diff --git a/src/mapi/glapi/gen/static_data.py b/src/mapi/glapi/gen/static_data.py index 2f403e9..25e78bf 100644 --- a/src/mapi/glapi/gen/static_data.py +++ b/src/mapi/glapi/gen/static_data.py @@ -484,17 +484,22 @@ functions = [ "BindVertexBuffer", "BindVertexBuffers", "Bitmap", +"BlendBarrier", "BlendColor", "BlendColorEXT", "BlendEquation", "BlendEquationEXT", +"BlendEquationi", "BlendEquationiARB", "BlendEquationSeparate", +"BlendEquationSeparatei", "BlendEquationSeparateiARB", "BlendFunc", +"BlendFunci", "BlendFunciARB", "BlendFuncSeparate", "BlendFuncSeparateEXT", +"BlendFuncSeparatei", "BlendFuncSeparateiARB", "BlitFramebuffer", "BufferData", @@ -825,6 +830,7 @@ functions = [ "GetFramebufferAttachmentParameteriv", "GetFramebufferAttachmentParameterivEXT", "GetFramebufferParameteriv", +"GetGraphicsResetStatus", "GetGraphicsResetStatusARB", "GetHandleARB", "GetHistogram", @@ -864,8 +870,11 @@ functions = [ "GetnSeparableFilterARB", "GetnTexImageARB", "GetnUniformdvARB", +"GetnUniformfv", "GetnUniformfvARB", +"GetnUniformiv", "GetnUniformivARB", +"GetnUniformuiv", "GetnUniformuivARB", "GetObjectLabel", "GetObjectParameterfvARB", @@ -1160,6 +1169,7 @@ functions = [ "Orthof", "Orthox", "PassThrough", +"PatchParameteri", "PauseTransformFeedback", "PixelMapfv", "PixelMapuiv", @@ -1191,6 +1201,7 @@ functions = [ "PopDebugGroup", "PopMatrix", "PopName", +"PrimitiveBoundingBox", "PrimitiveRestartIndex", "PrimitiveRestartIndexNV", "PrimitiveRestartNV", @@ -1273,6 +1284,7 @@ functions = [ "RasterPos4s", "RasterPos4sv", "ReadBuffer", +"ReadnPixels", "ReadnPixelsARB", "ReadPixels", "Rectd", ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): glapi: Move PrimitiveBoundingBox and BlendBarrier definitions into ES3.2 category.
Module: Mesa Branch: master Commit: 15a084a03998c5c86206137fdaf6f43b5f98485a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=15a084a03998c5c86206137fdaf6f43b5f98485a Author: Francisco Jerez Date: Tue Oct 18 14:53:20 2016 -0700 glapi: Move PrimitiveBoundingBox and BlendBarrier definitions into ES3.2 category. These two GLES 3.2 entry points were being defined in the category of the ARB_ES3_2_compatibility and KHR_blend_equation_advanced extensions respectively instead of in the ES3.2 category. Defining them in the ES3.2 category makes sure that the gl_procs.py generator emits declarations in the glprocs.h header file for the unsuffixed GLES-only entry points that PrimitiveBoundingBoxARB and BlendBarrierKHR respectively alias. This should avoid a compilation failure during scons builds in combination with "mapi: export all GLES 3.2 functions in libGLESv2.so". Cc: mesa-sta...@lists.freedesktop.org Reviewed-by: Dylan Baker --- src/mapi/glapi/gen/gl_API.xml | 30 +- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml index 5998ccf..00c9bb7 100644 --- a/src/mapi/glapi/gen/gl_API.xml +++ b/src/mapi/glapi/gen/gl_API.xml @@ -8296,6 +8296,23 @@ http://www.w3.org/2001/XInclude"/> + + + + + + + + + + + + + + + + @@ -8316,7 +8333,6 @@ - @@ -8332,18 +8348,6 @@ - - - - - - - - - - - ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): clover: Pass unquoted compiler arguments to Clang
Module: Mesa Branch: master Commit: e3272865c216933168e6c08766d266a33d0e1497 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e3272865c216933168e6c08766d266a33d0e1497 Author: Vedran Miletić Date: Wed Sep 28 17:11:43 2016 +0200 clover: Pass unquoted compiler arguments to Clang OpenCL apps can quote arguments they pass to the OpenCL compiler, most commonly include paths containing spaces. If the Clang OpenCL compiler was called via a shell, the shell would split the arguments with respect to to quotes and then remove quotes before passing the arguments to the compiler. Since we call Clang as a library, we have to split the argument with respect to quotes and then remove quotes before passing the arguments. v2: move to tokenize(), remove throwing of CL_INVALID_COMPILER_OPTIONS v3: simplify parsing logic, use more C++11 v4: restore error throwing, clarify a comment Signed-off-by: Vedran Miletić Reviewed-by: Francisco Jerez --- src/gallium/state_trackers/clover/llvm/util.hpp | 40 ++--- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/src/gallium/state_trackers/clover/llvm/util.hpp b/src/gallium/state_trackers/clover/llvm/util.hpp index 8db6f20..222becd 100644 --- a/src/gallium/state_trackers/clover/llvm/util.hpp +++ b/src/gallium/state_trackers/clover/llvm/util.hpp @@ -24,6 +24,7 @@ #ifndef CLOVER_LLVM_UTIL_HPP #define CLOVER_LLVM_UTIL_HPP +#include "core/error.hpp" #include "util/u_debug.h" #include @@ -42,11 +43,42 @@ namespace clover { inline std::vector tokenize(const std::string &s) { std::vector ss; - std::istringstream iss(s); - std::string t; + std::ostringstream oss; - while (getline(iss, t, ' ')) -ss.push_back(t); + // OpenCL programs can pass a quoted argument, most frequently the + // include path. This is useful so that path containing spaces is + // treated as a single argument instead of being split by the spaces. + // Additionally, the argument should also be unquoted before being + // passed to the compiler. We avoid using std::string::replace here to + // remove quotes, as the single and double quote characters can be a + // part of the file name. + bool escape_next = false; + bool in_quote_double = false; + bool in_quote_single = false; + + for (auto c : s) { +if (escape_next) { + oss.put(c); + escape_next = false; +} else if (c == '\\') { + escape_next = true; +} else if (c == '"' && !in_quote_single) { + in_quote_double = !in_quote_double; +} else if (c == '\'' && !in_quote_double) { + in_quote_single = !in_quote_single; +} else if (c != ' ' || in_quote_single || in_quote_double) { + oss.put(c); +} else if (oss.tellp() > 0) { + ss.emplace_back(oss.str()); + oss.str(""); +} + } + + if (oss.tellp() > 0) +ss.emplace_back(oss.str()); + + if (in_quote_double || in_quote_single) +throw invalid_build_options_error(); return ss; } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): clover: Implement clGetExtensionFunctionAddressForPlatform.
Module: Mesa Branch: master Commit: cb0879985a40bcde1516e5341c5a3e5ea0968b87 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cb0879985a40bcde1516e5341c5a3e5ea0968b87 Author: Serge Martin Date: Sun Sep 27 11:15:14 2015 +0200 clover: Implement clGetExtensionFunctionAddressForPlatform. Add clGetExtensionFunctionAddressForPlatform (CL 1.2). Reviewed-by: Francisco Jerez --- src/gallium/state_trackers/clover/api/dispatch.cpp | 2 +- src/gallium/state_trackers/clover/api/dispatch.hpp | 4 src/gallium/state_trackers/clover/api/platform.cpp | 16 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/clover/api/dispatch.cpp b/src/gallium/state_trackers/clover/api/dispatch.cpp index f10babe..8f4cfdc 100644 --- a/src/gallium/state_trackers/clover/api/dispatch.cpp +++ b/src/gallium/state_trackers/clover/api/dispatch.cpp @@ -131,7 +131,7 @@ namespace clover { clEnqueueMigrateMemObjects, clEnqueueMarkerWithWaitList, clEnqueueBarrierWithWaitList, - NULL, // clGetExtensionFunctionAddressForPlatform + GetExtensionFunctionAddressForPlatform, NULL, // clCreateFromGLTexture NULL, // clGetDeviceIDsFromD3D11KHR NULL, // clCreateFromD3D11BufferKHR diff --git a/src/gallium/state_trackers/clover/api/dispatch.hpp b/src/gallium/state_trackers/clover/api/dispatch.hpp index 7f62282..0ec1b51 100644 --- a/src/gallium/state_trackers/clover/api/dispatch.hpp +++ b/src/gallium/state_trackers/clover/api/dispatch.hpp @@ -777,6 +777,10 @@ namespace clover { void * GetExtensionFunctionAddress(const char *p_name); + void * + GetExtensionFunctionAddressForPlatform(cl_platform_id d_platform, + const char *p_name); + cl_int IcdGetPlatformIDsKHR(cl_uint num_entries, cl_platform_id *rd_platforms, cl_uint *rnum_platforms); diff --git a/src/gallium/state_trackers/clover/api/platform.cpp b/src/gallium/state_trackers/clover/api/platform.cpp index b1b1fdf..ed86163 100644 --- a/src/gallium/state_trackers/clover/api/platform.cpp +++ b/src/gallium/state_trackers/clover/api/platform.cpp @@ -92,6 +92,16 @@ clover::GetPlatformInfo(cl_platform_id d_platform, cl_platform_info param, } void * +clover::GetExtensionFunctionAddressForPlatform(cl_platform_id d_platform, + const char *p_name) try { + obj(d_platform); + return GetExtensionFunctionAddress(p_name); + +} catch (error &e) { + return NULL; +} + +void * clover::GetExtensionFunctionAddress(const char *p_name) { std::string name { p_name }; @@ -118,6 +128,12 @@ clGetExtensionFunctionAddress(const char *p_name) { return GetExtensionFunctionAddress(p_name); } +CLOVER_ICD_API void * +clGetExtensionFunctionAddressForPlatform(cl_platform_id d_platform, + const char *p_name) { + return GetExtensionFunctionAddressForPlatform(d_platform, p_name); +} + CLOVER_ICD_API cl_int clIcdGetPlatformIDsKHR(cl_uint num_entries, cl_platform_id *rd_platforms, cl_uint *rnum_platforms) { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): clover: Introduce CLOVER_EXTRA_*_OPTIONS environment variables
Module: Mesa Branch: master Commit: 2fba72046da09dd28f54df02794b358773899d13 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2fba72046da09dd28f54df02794b358773899d13 Author: Vedran Miletić Date: Wed Sep 28 16:18:24 2016 +0200 clover: Introduce CLOVER_EXTRA_*_OPTIONS environment variables The options specified in the CLOVER_EXTRA_BUILD_OPTIONS shell variable are appended to the options specified by the OpenCL program in the clBuildProgram function call, if any. Analogously, the options specified in the CLOVER_EXTRA_COMPILE_OPTIONS and CLOVER_EXTRA_LINK_OPTIONS variables are appended to the options specified in clCompileProgram and clLinkProgram function calls, respectively. v2: * rename to CLOVER_EXTRA_COMPILER_OPTIONS * use debug_get_option * append to linker options as well v3: code cleanups v4: separate CLOVER_EXTRA_LINKER_OPTIONS options v5: * fix documentation typo * use CLOVER_EXTRA_COMPILER_OPTIONS in link stage v6: * separate in CLOVER_EXTRA_{BUILD,COMPILE,LINK}_OPTIONS * append options in cl{Build,Compile,Link}Program Signed-off-by: Vedran Miletić Reviewed-by[v1]: Edward O'Callaghan v7 [Francisco Jerez]: Slight simplification. Reviewed-by: Francisco Jerez --- docs/envvars.html | 15 +++ src/gallium/state_trackers/clover/api/program.cpp | 10 +++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/docs/envvars.html b/docs/envvars.html index cf57ca5..af1a30c 100644 --- a/docs/envvars.html +++ b/docs/envvars.html @@ -235,6 +235,21 @@ Setting to "tgsi", for example, will print all the TGSI shaders. See src/mesa/state_tracker/st_debug.c for other options. +Clover state tracker environment variables + + +CLOVER_EXTRA_BUILD_OPTIONS - allows specifying additional compiler and linker +options. Specified options are appended after the options set by the OpenCL +program in clBuildProgram. +CLOVER_EXTRA_COMPILE_OPTIONS - allows specifying additional compiler +options. Specified options are appended after the options set by the OpenCL +program in clCompileProgram. +CLOVER_EXTRA_LINK_OPTIONS - allows specifying additional linker +options. Specified options are appended after the options set by the OpenCL +program in clLinkProgram. + + + Softpipe driver environment variables SOFTPIPE_DUMP_FS - if set, the softpipe driver will print fragment shaders diff --git a/src/gallium/state_trackers/clover/api/program.cpp b/src/gallium/state_trackers/clover/api/program.cpp index c3f9cb9..ba4ce7a 100644 --- a/src/gallium/state_trackers/clover/api/program.cpp +++ b/src/gallium/state_trackers/clover/api/program.cpp @@ -22,6 +22,7 @@ #include "api/util.hpp" #include "core/program.hpp" +#include "util/u_debug.h" #include @@ -177,7 +178,8 @@ clBuildProgram(cl_program d_prog, cl_uint num_devs, auto &prog = obj(d_prog); auto devs = (d_devs ? objs(d_devs, num_devs) : ref_vector(prog.context().devices())); - auto opts = (p_opts ? p_opts : ""); + const auto opts = std::string(p_opts ? p_opts : "") + " " + + debug_get_option("CLOVER_EXTRA_BUILD_OPTIONS", ""); validate_build_common(prog, num_devs, d_devs, pfn_notify, user_data); @@ -202,7 +204,8 @@ clCompileProgram(cl_program d_prog, cl_uint num_devs, auto &prog = obj(d_prog); auto devs = (d_devs ? objs(d_devs, num_devs) : ref_vector(prog.context().devices())); - auto opts = (p_opts ? p_opts : ""); + const auto opts = std::string(p_opts ? p_opts : "") + " " + + debug_get_option("CLOVER_EXTRA_COMPILE_OPTIONS", ""); header_map headers; validate_build_common(prog, num_devs, d_devs, pfn_notify, user_data); @@ -271,7 +274,8 @@ clLinkProgram(cl_context d_ctx, cl_uint num_devs, const cl_device_id *d_devs, void (*pfn_notify) (cl_program, void *), void *user_data, cl_int *r_errcode) try { auto &ctx = obj(d_ctx); - auto opts = (p_opts ? p_opts : ""); + const auto opts = std::string(p_opts ? p_opts : "") + " " + + debug_get_option("CLOVER_EXTRA_LINK_OPTIONS", ""); auto progs = objs(d_progs, num_progs); auto prog = create(ctx); auto devs = validate_link_devices(progs, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): nir: Flip gl_SamplePosition in nir_lower_wpos_ytransform().
Module: Mesa Branch: master Commit: f3d387867f74ae758b41168f23992671f7dce254 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f3d387867f74ae758b41168f23992671f7dce254 Author: Francisco Jerez Date: Tue Nov 1 11:56:13 2016 -0700 nir: Flip gl_SamplePosition in nir_lower_wpos_ytransform(). Assuming the hardware is set up to use a screen coordinate system flipped vertically with respect to the GL's window coordinate system, the SYSTEM_VALUE_SAMPLE_POS vector will also be flipped vertically with respect to the value expected by the GL, so we need to give it the same treatment as gl_FragCoord. Fixes the following CTS tests on i965: ES31-CTS.functional.shaders.multisample_interpolation.interpolate_at_offset.at_sample_position.default_framebuffer ES31-CTS.functional.shaders.sample_variables.sample_pos.correctness.default_framebuffer when run with any multisample configuration, e.g. rgbad24s8ms4. Cc: Reviewed-by: Kenneth Graunke Reviewed-by: Anuj Phogat --- src/compiler/nir/nir_lower_wpos_ytransform.c | 24 1 file changed, 24 insertions(+) diff --git a/src/compiler/nir/nir_lower_wpos_ytransform.c b/src/compiler/nir/nir_lower_wpos_ytransform.c index 173f058..f211c73 100644 --- a/src/compiler/nir/nir_lower_wpos_ytransform.c +++ b/src/compiler/nir/nir_lower_wpos_ytransform.c @@ -273,6 +273,26 @@ lower_interp_var_at_offset(lower_wpos_ytransform_state *state, } static void +lower_load_sample_pos(lower_wpos_ytransform_state *state, + nir_intrinsic_instr *intr) +{ + nir_builder *b = &state->b; + b->cursor = nir_after_instr(&intr->instr); + + nir_ssa_def *pos = &intr->dest.ssa; + nir_ssa_def *scale = nir_channel(b, get_transform(state), 0); + nir_ssa_def *neg_scale = nir_channel(b, get_transform(state), 2); + /* Either y or 1-y for scale equal to 1 or -1 respectively. */ + nir_ssa_def *flipped_y = + nir_fadd(b, nir_fmax(b, neg_scale, nir_imm_float(b, 0.0)), +nir_fmul(b, nir_channel(b, pos, 1), scale)); + nir_ssa_def *flipped_pos = nir_vec2(b, nir_channel(b, pos, 0), flipped_y); + + nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, nir_src_for_ssa(flipped_pos), + flipped_pos->parent_instr); +} + +static void lower_wpos_ytransform_block(lower_wpos_ytransform_state *state, nir_block *block) { nir_foreach_instr_safe(instr, block) { @@ -287,6 +307,10 @@ lower_wpos_ytransform_block(lower_wpos_ytransform_state *state, nir_block *block /* gl_FragCoord should not have array/struct deref's: */ assert(dvar->deref.child == NULL); lower_fragcoord(state, intr); +} else if (var->data.mode == nir_var_system_value && + var->data.location == SYSTEM_VALUE_SAMPLE_POS) { + assert(dvar->deref.child == NULL); + lower_load_sample_pos(state, intr); } } else if (intr->intrinsic == nir_intrinsic_interp_var_at_offset) { lower_interp_var_at_offset(state, intr); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): clover: Restore support for LLVM <= 3.9.
Module: Mesa Branch: master Commit: 95ddb37708ca16ccbd0f607d17a82be2de0d07b6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=95ddb37708ca16ccbd0f607d17a82be2de0d07b6 Author: Vedran Miletić Date: Tue Nov 22 20:25:34 2016 +0100 clover: Restore support for LLVM <= 3.9. The commit 8e430ff8b060b4e8e922bae24b3c57837da6ea77 broke support for LLVM 3.9 and older versions in Clover. This patch restores it and refactors the support using Clover compatibility layer for LLVM. v2: merged #ifdef blocks v3: added support for LLVM 3.6-3.8 v4: add missing #ifdef around v5: simplify using templates and lambda Signed-off-by: Vedran Miletić Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98740 Tested-by[v4]: Pierre Moreau Tested-by: Vinson Lee Reviewed-by: Francisco Jerez Reviewed-by: Jan Vesely --- .../state_trackers/clover/llvm/codegen/bitcode.cpp | 9 +++-- src/gallium/state_trackers/clover/llvm/compat.hpp | 18 ++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/gallium/state_trackers/clover/llvm/codegen/bitcode.cpp b/src/gallium/state_trackers/clover/llvm/codegen/bitcode.cpp index 5dcc4f8..d09207b 100644 --- a/src/gallium/state_trackers/clover/llvm/codegen/bitcode.cpp +++ b/src/gallium/state_trackers/clover/llvm/codegen/bitcode.cpp @@ -32,6 +32,7 @@ /// #include "llvm/codegen.hpp" +#include "llvm/compat.hpp" #include "llvm/metadata.hpp" #include "core/error.hpp" #include "util/algorithm.hpp" @@ -99,13 +100,9 @@ clover::llvm::parse_module_library(const module &m, ::llvm::LLVMContext &ctx, auto mod = ::llvm::parseBitcodeFile(::llvm::MemoryBufferRef( as_string(m.secs[0].data), " "), ctx); - if (::llvm::Error err = mod.takeError()) { - std::string msg; - ::llvm::handleAllErrors(std::move(err), [&](::llvm::ErrorInfoBase &EIB) { - msg = EIB.message(); - fail(r_log, error(CL_INVALID_PROGRAM), msg.c_str()); + compat::handle_module_error(mod, [&](const std::string &s) { + fail(r_log, error(CL_INVALID_PROGRAM), s); }); - } return std::unique_ptr<::llvm::Module>(std::move(*mod)); } diff --git a/src/gallium/state_trackers/clover/llvm/compat.hpp b/src/gallium/state_trackers/clover/llvm/compat.hpp index a963cff..81592ce 100644 --- a/src/gallium/state_trackers/clover/llvm/compat.hpp +++ b/src/gallium/state_trackers/clover/llvm/compat.hpp @@ -39,6 +39,11 @@ #include #include #include +#if HAVE_LLVM >= 0x0400 +#include +#else +#include +#endif #if HAVE_LLVM >= 0x0307 #include @@ -158,6 +163,19 @@ namespace clover { #else const auto default_reloc_model = ::llvm::Reloc::Default; #endif + + template void + handle_module_error(M &mod, const F &f) { +#if HAVE_LLVM >= 0x0400 +if (::llvm::Error err = mod.takeError()) + ::llvm::handleAllErrors(std::move(err), [&](::llvm::ErrorInfoBase &eib) { + f(eib.message()); + }); +#else +if (!mod) + f(mod.getError().message()); +#endif + } } } } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965: Factor out oword block read and write message control calculation.
Module: Mesa Branch: master Commit: 7a6aadb76ff3f6ef73216b53b0dc5edda5bae978 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7a6aadb76ff3f6ef73216b53b0dc5edda5bae978 Author: Francisco Jerez Date: Thu Dec 8 19:58:25 2016 -0800 i965: Factor out oword block read and write message control calculation. We'll need roughly the same logic in other places and it would be annoying to duplicate it. Instead factor it out into a function-like macro that takes the number of dwords per block (which will prove more convenient than taking the same value in owords or some other unit). Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_defines.h | 6 ++ src/mesa/drivers/dri/i965/brw_eu_emit.c | 14 ++ 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index b1b6248..1875380 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1669,6 +1669,12 @@ enum brw_message_target { #define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 #define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3 #define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4 +#define BRW_DATAPORT_OWORD_BLOCK_DWORDS(n) \ + ((n) == 4 ? BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW :\ +(n) == 8 ? BRW_DATAPORT_OWORD_BLOCK_2_OWORDS : \ +(n) == 16 ? BRW_DATAPORT_OWORD_BLOCK_4_OWORDS : \ +(n) == 32 ? BRW_DATAPORT_OWORD_BLOCK_8_OWORDS : \ +(abort(), ~0)) #define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 #define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS2 diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 341f543..6141bfb 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2056,11 +2056,6 @@ void brw_oword_block_write_scratch(struct brw_codegen *p, mrf = retype(mrf, BRW_REGISTER_TYPE_UD); const unsigned mlen = 1 + num_regs; - const unsigned msg_control = - (num_regs == 1 ? BRW_DATAPORT_OWORD_BLOCK_2_OWORDS : - num_regs == 2 ? BRW_DATAPORT_OWORD_BLOCK_4_OWORDS : - num_regs == 4 ? BRW_DATAPORT_OWORD_BLOCK_8_OWORDS : 0); - assert(msg_control); /* Set up the message header. This is g0, with g0.2 filled with * the offset. We don't want to leave our offset around in g0 or @@ -2134,7 +2129,7 @@ void brw_oword_block_write_scratch(struct brw_codegen *p, brw_set_dp_write_message(p, insn, brw_scratch_surface_idx(p), - msg_control, + BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8), msg_type, target_cache, mlen, @@ -2181,11 +2176,6 @@ brw_oword_block_read_scratch(struct brw_codegen *p, dest = retype(dest, BRW_REGISTER_TYPE_UW); const unsigned rlen = num_regs; - const unsigned msg_control = - (num_regs == 1 ? BRW_DATAPORT_OWORD_BLOCK_2_OWORDS : - num_regs == 2 ? BRW_DATAPORT_OWORD_BLOCK_4_OWORDS : - num_regs == 4 ? BRW_DATAPORT_OWORD_BLOCK_8_OWORDS : 0); - assert(msg_control); const unsigned target_cache = (devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE : devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE : @@ -,7 +2212,7 @@ brw_oword_block_read_scratch(struct brw_codegen *p, brw_set_dp_read_message(p, insn, brw_scratch_surface_idx(p), - msg_control, + BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8), BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ target_cache, 1, /* msg_length */ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965/disasm: Decode dataport constant cache control fields.
Module: Mesa Branch: master Commit: fd3120d85c295eeeb3b6c9a60372506ae48f5fdb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fd3120d85c295eeeb3b6c9a60372506ae48f5fdb Author: Francisco Jerez Date: Thu Dec 8 22:14:59 2016 -0800 i965/disasm: Decode dataport constant cache control fields. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_disasm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index 5e51be7..5930e44 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -1410,6 +1410,7 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo, } break; case GEN6_SFID_DATAPORT_SAMPLER_CACHE: + case GEN6_SFID_DATAPORT_CONSTANT_CACHE: /* aka BRW_SFID_DATAPORT_READ on Gen4-5 */ if (devinfo->gen >= 6) { format(file, " (%"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64")", ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965/gen6+: Invalidate constant cache on brw_emit_mi_flush( ).
Module: Mesa Branch: master Commit: 591e14ec08b13e8d50636feb1afa578257175b9d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=591e14ec08b13e8d50636feb1afa578257175b9d Author: Francisco Jerez Date: Thu Dec 8 18:00:17 2016 -0800 i965/gen6+: Invalidate constant cache on brw_emit_mi_flush(). In order to make sure that the constant cache is coherent with previous rendering when we start using it for pull constant loads. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_pipe_control.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c index dd426bf..b8f7406 100644 --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c @@ -351,6 +351,7 @@ brw_emit_mi_flush(struct brw_context *brw) int flags = PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_RENDER_TARGET_FLUSH; if (brw->gen >= 6) { flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE | + PIPE_CONTROL_CONST_CACHE_INVALIDATE | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965: Let the caller of brw_set_dp_write/ read_message control the target cache.
Module: Mesa Branch: master Commit: 3c78d31374422b028b19afa5799689c404a5b73e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3c78d31374422b028b19afa5799689c404a5b73e Author: Francisco Jerez Date: Thu Apr 23 14:36:16 2015 +0300 i965: Let the caller of brw_set_dp_write/read_message control the target cache. brw_set_dp_read_message already had a target_cache argument, but its interpretation was rather convoluted (on Gen6 the render cache was used if the caller asked for it, otherwise it was ignored using the sampler cache instead), and the constant cache wasn't representable at all. brw_set_dp_write_message used the data cache on Gen7+ except for RENDER_TARGET_WRITE messages, in which case it would use the render cache. On Gen6 the render cache was always used. Instead of the above, provide the shared unit SFID that the caller expects will be used. Makes no functional changes. v3: Non-trivial rebase. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_eu.h | 1 + src/mesa/drivers/dri/i965/brw_eu_emit.c | 69 +++- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 15 -- 3 files changed, 43 insertions(+), 42 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 737a335..c44896b 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -233,6 +233,7 @@ void brw_set_dp_write_message(struct brw_codegen *p, unsigned binding_table_index, unsigned msg_control, unsigned msg_type, + unsigned target_cache, unsigned msg_length, bool header_present, unsigned last_render_target, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index ca04221..72b6df6 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -706,6 +706,7 @@ brw_set_dp_write_message(struct brw_codegen *p, unsigned binding_table_index, unsigned msg_control, unsigned msg_type, + unsigned target_cache, unsigned msg_length, bool header_present, unsigned last_render_target, @@ -714,20 +715,8 @@ brw_set_dp_write_message(struct brw_codegen *p, unsigned send_commit_msg) { const struct gen_device_info *devinfo = p->devinfo; - unsigned sfid; - - if (devinfo->gen >= 7) { - /* Use the Render Cache for RT writes; otherwise use the Data Cache */ - if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE) -sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; - else -sfid = GEN7_SFID_DATAPORT_DATA_CACHE; - } else if (devinfo->gen == 6) { - /* Use the render cache for all write messages. */ - sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; - } else { - sfid = BRW_SFID_DATAPORT_WRITE; - } + const unsigned sfid = (devinfo->gen >= 6 ? target_cache : + BRW_SFID_DATAPORT_WRITE); brw_set_message_descriptor(p, insn, sfid, msg_length, response_length, header_present, end_of_thread); @@ -753,26 +742,8 @@ brw_set_dp_read_message(struct brw_codegen *p, unsigned response_length) { const struct gen_device_info *devinfo = p->devinfo; - unsigned sfid; - - if (devinfo->gen >= 7) { - if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE) - sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; - else if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE) - sfid = GEN7_SFID_DATAPORT_DATA_CACHE; - else if (target_cache == BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE) - sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE; - else - unreachable("Invalid target cache"); - - } else if (devinfo->gen == 6) { - if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE) -sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; - else -sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE; - } else { - sfid = BRW_SFID_DATAPORT_READ; - } + const unsigned sfid = (devinfo->gen >= 6 ? target_cache : + BRW_SFID_DATAPORT_READ); brw_set_message_descriptor(p, insn, sfid, msg_length, response_length, header_present, false); @@ -2073,6 +2044,10 @@ void brw_oword_block_write_scratch(struct brw_codegen *p, unsigned offset) { const struct gen_device_info *devinfo = p->devinfo; + const unsigned target_cache = + (devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE : + devinfo->gen >= 6 ? GEN6_SFID_DAT
Mesa (master): i965/fs: Drop useless access mode override from pull constant generator code.
Module: Mesa Branch: master Commit: e014058195540a3e54085903821beca70f8f2ec5 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e014058195540a3e54085903821beca70f8f2ec5 Author: Francisco Jerez Date: Thu Dec 8 19:08:33 2016 -0800 i965/fs: Drop useless access mode override from pull constant generator code. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 93f4c41..db61d8e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1175,7 +1175,6 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_set_default_access_mode(p, BRW_ALIGN_1); /* a0.0 = surf_index & 0xff */ brw_inst *insn_and = brw_next_insn(p, BRW_OPCODE_AND); @@ -1311,7 +1310,6 @@ fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst, brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_set_default_access_mode(p, BRW_ALIGN_1); /* a0.0 = surf_index & 0xff */ brw_inst *insn_and = brw_next_insn(p, BRW_OPCODE_AND); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965/fs: Fetch one cacheline of pull constants at a time.
Module: Mesa Branch: master Commit: b56fa830c6095f8226456b2aeb62f2dfad804be5 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b56fa830c6095f8226456b2aeb62f2dfad804be5 Author: Francisco Jerez Date: Thu Dec 8 19:18:00 2016 -0800 i965/fs: Fetch one cacheline of pull constants at a time. Asking the DC for less than one cacheline (4 owords) of data for uniform pull constants is suboptimal because the DC cannot request less than that from L3, resulting in wasted bandwidth and unnecessary message dispatch overhead, and exacerbating the IVB L3 serialization bug. The following table summarizes the overall framerate improvement (with statistical significance of 5% and sample size ~10) from the whole series up to this patch for several benchmarks and hardware generations: | SKL | BDW | HSW SynMark2 OglShMapPcf | 24.63% ±0.45% | 4.01% ±0.70% | 10.31% ±0.38% GfxBench4 gl_manhattan31 | 5.93% ±0.35% | 3.92% ±0.31% | 6.62% ±0.22% GfxBench4 gl_4 | 2.52% ±0.44% | 1.23% ±0.10% | N/A Unigine Valley | 0.83% ±0.17% | 0.23% ±0.05% | 0.74% ±0.45% Note that there are two versions of the Manhattan demo shipped with GfxBench4, one of them is the original gl_manhattan demo which doesn't use UBOs, so this patch will have no effect on it, and another one is the gl_manhattan31 demo based on GL 4.3/GLES 3.1, which this patch benefits as shown above. I haven't observed any statistically significant regressions in the benchmarks I have at hand. Note that the comparatively huge improvement on SKL in the OglShMapPcf test case is due to the combined effect of this patch and the register pressure benefit on SKL+ of "i965/fs: Switch to the constant cache for uniform pull constants.", part of the same series. Going up to 8 oword blocks would improve performance of pull constants even more, but at the cost of some additional bandwidth and register pressure, so it would have to be done on-demand based on the number of constants actually used by the shader. v2: Fix for Gen4 and 5. v3: Non-trivial rebase. Rework to allow the visitor specifiy arbitrary pull constant block sizes. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs.cpp | 21 + src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 16 +--- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 977fd8c..671b44b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2111,25 +2111,22 @@ fs_visitor::lower_constant_loads() if (pull_index == -1) continue; - const unsigned index = stage_prog_data->binding_table.pull_constants_start; - fs_reg dst; - - if (type_sz(inst->src[i].type) <= 4) -dst = vgrf(glsl_type::float_type); - else -dst = vgrf(glsl_type::double_type); - assert(inst->src[i].stride == 0); - const fs_builder ubld = ibld.exec_all().group(4, 0); - struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15); + const unsigned index = stage_prog_data->binding_table.pull_constants_start; + const unsigned block_sz = 64; /* Fetch one cacheline at a time. */ + const fs_builder ubld = ibld.exec_all().group(block_sz / 4, 0); + const fs_reg dst = ubld.vgrf(BRW_REGISTER_TYPE_UD); + const unsigned base = pull_index * 4; + ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, - dst, brw_imm_ud(index), offset); + dst, brw_imm_ud(index), brw_imm_ud(base & ~(block_sz - 1))); /* Rewrite the instruction to use the temporary VGRF. */ inst->src[i].file = VGRF; inst->src[i].nr = dst.nr; - inst->src[i].offset = (pull_index & 3) * 4 + inst->src[i].offset % 4; + inst->src[i].offset = (base & (block_sz - 1)) + + inst->src[i].offset % 4; brw_mark_surface_used(prog_data, index); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 7df7423..9f2729a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -4059,21 +4059,23 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr * and we have to split it if necessary. */ const unsigned type_size = type_sz(dest.type); - const fs_builder ubld = bld.exec_all().group(4, 0); - const fs_reg packed_consts = ubld.vgrf(BRW_REGISTER_TYPE_F); + const unsigned block_sz = 64; /* Fetch one cacheline at a time. */ + const fs_builder ubld = bld.exec_all().group(block_sz / 4, 0); + const fs_reg packed_consts = ubld.vgrf(BRW_REGI
Mesa (master): i965/fs: Remove the FS_OPCODE_SET_SIMD4X2_OFFSET virtual opcode.
Module: Mesa Branch: master Commit: 23caf75182d010a60e2d8c8633acaacb3e7c065d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=23caf75182d010a60e2d8c8633acaacb3e7c065d Author: Francisco Jerez Date: Wed Apr 22 21:37:46 2015 +0300 i965/fs: Remove the FS_OPCODE_SET_SIMD4X2_OFFSET virtual opcode. Not used anymore. It was just a scalar MOV. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_defines.h| 1 - src/mesa/drivers/dri/i965/brw_fs.h | 3 --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 27 -- src/mesa/drivers/dri/i965/brw_shader.cpp | 2 -- 4 files changed, 33 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 1875380..a07d307 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1119,7 +1119,6 @@ enum opcode { FS_OPCODE_MOV_DISPATCH_TO_FLAGS, FS_OPCODE_DISCARD_JUMP, FS_OPCODE_SET_SAMPLE_ID, - FS_OPCODE_SET_SIMD4X2_OFFSET, FS_OPCODE_PACK_HALF_2x16_SPLIT, FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 941c05f..d0e272b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -442,9 +442,6 @@ private: struct brw_reg src0, struct brw_reg src1); - void generate_set_simd4x2_offset(fs_inst *inst, -struct brw_reg dst, -struct brw_reg offset); void generate_discard_jump(fs_inst *inst); void generate_pack_half_2x16_split(fs_inst *inst, diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index db61d8e..aed3c72 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1379,29 +1379,6 @@ fs_generator::generate_pixel_interpolator_query(fs_inst *inst, inst->size_written / REG_SIZE); } - -/** - * Sets the first word of a vgrf for gen7+ simd4x2 uniform pull constant - * sampler LD messages. - * - * We don't want to bake it into the send message's code generation because - * that means we don't get a chance to schedule the instructions. - */ -void -fs_generator::generate_set_simd4x2_offset(fs_inst *inst, - struct brw_reg dst, - struct brw_reg value) -{ - assert(value.file == BRW_IMMEDIATE_VALUE); - - brw_push_insn_state(p); - brw_set_default_exec_size(p, BRW_EXECUTE_8); - brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_MOV(p, retype(brw_vec1_reg(dst.file, dst.nr, 0), value.type), value); - brw_pop_insn_state(p); -} - /* Sets vstride=1, width=4, hstride=0 of register src1 during * the ADD instruction. */ @@ -2004,10 +1981,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) brw_memory_fence(p, dst); break; - case FS_OPCODE_SET_SIMD4X2_OFFSET: - generate_set_simd4x2_offset(inst, dst, src[0]); - break; - case SHADER_OPCODE_FIND_LIVE_CHANNEL: { const struct brw_reg mask = brw_stage_has_packed_dispatch(devinfo, stage, diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 25f745d..afab4aa 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -363,8 +363,6 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) case FS_OPCODE_SET_SAMPLE_ID: return "set_sample_id"; - case FS_OPCODE_SET_SIMD4X2_OFFSET: - return "set_simd4x2_offset"; case FS_OPCODE_PACK_HALF_2x16_SPLIT: return "pack_half_2x16_split"; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965/fs: Expose arbitrary pull constant load sizes to the IR.
Module: Mesa Branch: master Commit: 9b22a0d295316b7547667ebbfe1e1b6182439186 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9b22a0d295316b7547667ebbfe1e1b6182439186 Author: Francisco Jerez Date: Thu Dec 8 20:05:18 2016 -0800 i965/fs: Expose arbitrary pull constant load sizes to the IR. Change the FS generator to ask the dataport for enough owords worth of constants to fill the execution size of the instruction -- Which means that the visitor now needs to set the execution size correctly for uniform pull constant load instructions, which we were kind of neglecting until now. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_eu_emit.c| 15 +++--- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 27 -- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 9 + 4 files changed, 26 insertions(+), 27 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 6141bfb..8536a13 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2256,7 +2256,7 @@ gen7_block_read_scratch(struct brw_codegen *p, } /** - * Read a float[4] vector from the data port constant cache. + * Read float[4] vectors from the data port constant cache. * Location (in buffer) should be a multiple of 16. * Used for fetching shader constants. */ @@ -2270,6 +2270,7 @@ void brw_oword_block_read(struct brw_codegen *p, const unsigned target_cache = (devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_CONSTANT_CACHE : BRW_DATAPORT_READ_TARGET_DATA_CACHE); + const unsigned exec_size = 1 << brw_inst_exec_size(devinfo, p->current); /* On newer hardware, offset is in units of owords. */ if (devinfo->gen >= 6) @@ -2278,11 +2279,12 @@ void brw_oword_block_read(struct brw_codegen *p, mrf = retype(mrf, BRW_REGISTER_TYPE_UD); brw_push_insn_state(p); - brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_push_insn_state(p); + brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); /* set message header global offset field (reg 0, element 2) */ @@ -2291,6 +2293,7 @@ void brw_oword_block_read(struct brw_codegen *p, mrf.nr, 2), BRW_REGISTER_TYPE_UD), brw_imm_ud(offset)); + brw_pop_insn_state(p); brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); @@ -2305,15 +2308,13 @@ void brw_oword_block_read(struct brw_codegen *p, brw_inst_set_base_mrf(devinfo, insn, mrf.nr); } - brw_set_dp_read_message(p, - insn, - bind_table_index, - BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, + brw_set_dp_read_message(p, insn, bind_table_index, + BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size), BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, target_cache, 1, /* msg_length */ true, /* header_present */ - 1); /* response_length (1 reg, 2 owords!) */ + DIV_ROUND_UP(exec_size, 8)); /* response_length */ brw_pop_insn_state(p); } diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index b22dc9a..977fd8c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2121,7 +2121,7 @@ fs_visitor::lower_constant_loads() assert(inst->src[i].stride == 0); - const fs_builder ubld = ibld.exec_all().group(8, 0); + const fs_builder ubld = ibld.exec_all().group(4, 0); struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15); ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, dst, brw_imm_ud(index), offset); diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 8b9fa8e..93f4c41 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1127,6 +1127,7 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg index, struct brw_reg offset) { + assert(type_sz(dst.type) == 4); assert(inst->mlen != 0); assert(index.file == BRW_IMMEDIATE_VALUE && @@ -1149,27 +1150,25 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, { assert(index.type == BRW_REGISTER_TYPE_UD); assert(pa
Mesa (master): i965/fs: Switch to the constant cache for uniform pull constants.
Module: Mesa Branch: master Commit: ad38ba113491869ab0dffed937f7b3dd50e8a735 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ad38ba113491869ab0dffed937f7b3dd50e8a735 Author: Francisco Jerez Date: Wed Oct 26 14:25:06 2016 -0700 i965/fs: Switch to the constant cache for uniform pull constants. This reverts to using the oword block read messages for uniform pull constant loads, as used to be the case until 4c1fdae0a01b3f92ec03b61aac1d3df5. There are two important differences though: Now the L3 cacheability bits are set up correctly for UBOs (since 11f5d8a5d4fbb861ec161f68593e429cbd65d1cd), and we target the constant cache instead of the data cache. The latter used to get no L3 way allocation on boot on all platforms that existed at the time, so oword read messages wouldn't get cached on L3 regardless of the MOCS bits, what probably explains the apparent slowness of oword fetches. Constant cache loads seem to perform better than SIMD4x2 sampler loads in a number of cases, they alleviate some of the cache thrashing caused by the competition with textures for the L1/L2 sampler caches, and they allow fetching up to 128B worth of constants with a single oword fetch message. Note that IVB devices suffer from a hardware bug that leads to serialization of L3 read requests overlapping the same cacheline as result of a (on IVB buggy) mechanism of the L3 to preserve coherency. Since read requests for matching cachelines from any L3 client are not pipelined, throughput may decrease in cases where there are no non-overlapping requests left in the queue that can be processed between them. This situation should be relatively uncommon as long as we make sure that we don't use the 1/2 oword messages in cases where the shader intends to read from any other location of the same cacheline at some other point. This is generally a good idea anyway on all generations because using the 1 and 2 oword messages is expected to waste bandwidth since the minimum L3 request size for the DC is exactly 4 owords (i.e. one cacheline). A future commit will have this effect. I haven't been able to find any real-world example where this would still result in a regression on IVB, but if someone happens to find one it shouldn't be too difficult to add an IVB-specific check to have it fall back to the sampler cache for pull constant loads. Note that on SKL+ this change has the additional benefit of reducing the register footprint of pull constant loads. The following table summarizes the effect of the whole series on several shader-db stats: Total instructions Total cycles BWR: 4571248 -> 4568342 (-0.06%) 123375740 -> 123373296 (-0.00%) ELK: 3989020 -> 3985402 (-0.09%) 98757068 -> 98754058 (-0.00%) ILK: 6383591 -> 6376787 (-0.11%) 143649910 -> 143648914 (-0.00%) SNB: 7528395 -> 7501446 (-0.36%) 103503796 -> 102460370 (-1.01%) IVB: 6949221 -> 6943317 (-0.08%) 60592262 -> 60584422 (-0.01%) HSW: 6409753 -> 6403702 (-0.09%) 60609070 -> 60604414 (-0.01%) BDW: 8043467 -> 7976364 (-0.83%) 68427730 -> 68483042 (0.08%) CHV: 8045019 -> 7977916 (-0.83%) 68297426 -> 68352756 (0.08%) SKL: 8204037 -> 7939086 (-3.23%) 66583900 -> 65624378 (-1.44%) Lost->Gained Total spills Total fills BWR: 5 -> 51488 -> 1488 (0.00%) 1957 -> 1957 (0.00%) ELK: 5 -> 51489 -> 1489 (0.00%) 1958 -> 1958 (0.00%) ILK: 1 -> 41449 -> 1449 (0.00%) 1921 -> 1921 (0.00%) SNB: 0 -> 0 549 -> 549 (0.00%) 52 -> 52 (0.00%) IVB: 13 -> 31271 -> 1271 (0.00%) 1162 -> 1162 (0.00%) HSW: 11 -> 01271 -> 1271 (0.00%) 1162 -> 1162 (0.00%) BDW: 12 -> 01340 -> 1340 (0.00%) 1452 -> 1452 (0.00%) CHV: 12 -> 01340 -> 1340 (0.00%) 1452 -> 1452 (0.00%) SKL: 0 -> 1201269 -> 375 (-70.45%) 1563 -> 690 (-55.85%) v3: Non-trivial rebase. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_eu_emit.c| 5 +- src/mesa/drivers/dri/i965/brw_fs.cpp | 42 +++--- src/mesa/drivers/dri/i965/brw_fs.h | 2 +- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 78 +- 4 files changed, 36 insertions(+), 91 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 72b6df6..341f543 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2266,7 +2266,7 @@ gen7_block_read_scratch(struct brw_codegen *p, } /** - * Read a float[4] vector from the data port Data Cache (const buffer). + * Read a float[4] vector from the data port constant cache. * Location (in buffer) should be a multiple of 16. * Used for fetching shader constants. */ @@ -2278,8 +2278,7 @@ void brw_oword_block_read(struct brw_codegen *p, { const struct gen_device_info *devinfo = p->de
Mesa (master): anv: Fix uniform and storage buffer offset alignment limits.
Module: Mesa Branch: master Commit: 79d08ed3d21bef21881303f320706ebb2098a50a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=79d08ed3d21bef21881303f320706ebb2098a50a Author: Francisco Jerez Date: Thu Dec 15 13:34:02 2016 -0800 anv: Fix uniform and storage buffer offset alignment limits. This fixes a regression in a bunch of image store vulkan CTS tests from commit ad38ba113491869ab0dffed937f7b3dd50e8a735, which started using OWORD block read messages to implement UBO loads. The reason for the failure is that we were giving bogus buffer alignment limits to the application (1B), so the CTS would happily come back with descriptor sets pointing at not even word-aligned uniform buffer addresses. Surprisingly the sampler messages used to fetch pull constants before that commit were able to cope with the non-texel aligned addresses, but the dataport messages used to fetch pull constants after that commit and the ones used to access storage buffers (before and after the same commit) aren't as permissive with unaligned addresses. Cc: Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99097 Reported-by: Mark Janes Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index e3d278d..9245e5c 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -582,8 +582,8 @@ void anv_GetPhysicalDeviceProperties( .viewportSubPixelBits = 13, /* We take a float? */ .minMemoryMapAlignment= 4096, /* A page */ .minTexelBufferOffsetAlignment= 1, - .minUniformBufferOffsetAlignment = 1, - .minStorageBufferOffsetAlignment = 1, + .minUniformBufferOffsetAlignment = 16, + .minStorageBufferOffsetAlignment = 4, .minTexelOffset = -8, .maxTexelOffset = 7, .minTexelGatherOffset = -32, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): clover: Use Clang's diagnostics
Module: Mesa Branch: master Commit: d9fef848a651b47520cbeb72c38b93d4fbf842a8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d9fef848a651b47520cbeb72c38b93d4fbf842a8 Author: Vedran Miletić Date: Wed Dec 21 13:49:36 2016 +0100 clover: Use Clang's diagnostics Presently errors from frontend are handled only if they occur in clang::CompilerInvocation::CreateFromArgs(). This patch uses clang::DiagnosticsEngine to detect errors such as invalid values for Clang frontend arguments. Fixes Piglit's cl/program/build/fail/invalid-version-declaration.cl test. v2: fix inconsistent code formatting Signed-off-by: Vedran Miletić Reviewed-by: Francisco Jerez Tested-by: Aaron Watry --- src/gallium/state_trackers/clover/llvm/invocation.cpp | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp index 675cf19..f63ff3d 100644 --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp @@ -98,8 +98,9 @@ namespace { const std::vector &opts, std::string &r_log) { std::unique_ptr c { new clang::CompilerInstance }; + clang::TextDiagnosticBuffer *diag_buffer = new clang::TextDiagnosticBuffer; clang::DiagnosticsEngine diag { new clang::DiagnosticIDs, -new clang::DiagnosticOptions, new clang::TextDiagnosticBuffer }; +new clang::DiagnosticOptions, diag_buffer }; // Parse the compiler options. A file name should be present at the end // and must have the .cl extension in order for the CompilerInvocation @@ -111,6 +112,10 @@ namespace { c->getInvocation(), copts.data(), copts.data() + copts.size(), diag)) throw invalid_build_options_error(); + diag_buffer->FlushDiagnostics(diag); + if (diag.hasErrorOccurred()) + throw invalid_build_options_error(); + c->getTargetOpts().CPU = target.cpu; c->getTargetOpts().Triple = target.triple; c->getLangOpts().NoBuiltin = true; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit