Re: [Mesa-dev] [PATCH 1/5] radeonsi: remove fast color clear for single-sample buffers
For the series (1-3) Tested-by: Dieter Nützel Are these numbers OK? Triangle,Radeon RX 580 Series (POLARIS10 / DRM 3.25.0 / 4.16.0-rc1-1.g7262353-default+, LLVM 7.0.0),3.1 Mesa 18.1.0-devel (git-a8cc051d2e),1920,1080,YES,Off,5240,6,0,314471 Or should I retest without this series? Dieter Am 11.03.2018 19:11, schrieb Marek Olšák: From: Marek Olšák This should improve the score for the GpuTest Triangle benchmark. Vulkan doesn't use this either. --- src/gallium/drivers/radeon/r600_pipe_common.h | 1 - src/gallium/drivers/radeon/r600_texture.c | 11 +--- src/gallium/drivers/radeonsi/si_clear.c | 37 ++- src/gallium/drivers/radeonsi/si_state.c | 6 - 4 files changed, 3 insertions(+), 52 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 7941903..9701757 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -209,21 +209,20 @@ struct r600_cmask_info { struct r600_texture { struct r600_resourceresource; struct radeon_surf surface; uint64_tsize; struct r600_texture *flushed_depth_texture; /* Colorbuffer compression and fast clear. */ struct r600_fmask_info fmask; struct r600_cmask_info cmask; - struct r600_resource*cmask_buffer; uint64_tdcc_offset; /* 0 = disabled */ unsignedcb_color_info; /* fast clear enable bit */ unsignedcolor_clear_value[2]; unsignedlast_msaa_resolve_target_micro_mode; unsignednum_level0_transfers; /* Depth buffer compression and fast clear. */ uint64_thtile_offset; float depth_clear_value; uint16_t dirty_level_mask; /* each bit says if that mipmap is compressed */ diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index 125e7ef..03bc955 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -405,26 +405,22 @@ void si_texture_discard_cmask(struct si_screen *sscreen, { if (!rtex->cmask.size) return; assert(rtex->resource.b.b.nr_samples <= 1); /* Disable CMASK. */ memset(&rtex->cmask, 0, sizeof(rtex->cmask)); rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8; rtex->dirty_level_mask = 0; - rtex->cb_color_info &= ~S_028C70_FAST_CLEAR(1); - if (rtex->cmask_buffer != &rtex->resource) - r600_resource_reference(&rtex->cmask_buffer, NULL); - /* Notify all contexts about the change. */ p_atomic_inc(&sscreen->dirty_tex_counter); p_atomic_inc(&sscreen->compressed_colortex_counter); } static bool r600_can_disable_dcc(struct r600_texture *rtex) { /* We can't disable DCC if it can be written by another process. */ return rtex->dcc_offset && (!rtex->resource.b.is_shared || @@ -813,24 +809,20 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen, slice_size, whandle); } static void r600_texture_destroy(struct pipe_screen *screen, struct pipe_resource *ptex) { struct r600_texture *rtex = (struct r600_texture*)ptex; struct r600_resource *resource = &rtex->resource; r600_texture_reference(&rtex->flushed_depth_texture, NULL); - - if (rtex->cmask_buffer != &rtex->resource) { - r600_resource_reference(&rtex->cmask_buffer, NULL); - } pb_reference(&resource->buf, NULL); r600_resource_reference(&rtex->dcc_separate_buffer, NULL); r600_resource_reference(&rtex->last_dcc_separate_buffer, NULL); FREE(rtex); } static const struct u_resource_vtbl r600_texture_vtbl; /* The number of samples can be specified independently of the texture. */ void si_texture_get_fmask_info(struct si_screen *sscreen, @@ -1262,21 +1254,20 @@ r600_texture_create_object(struct pipe_screen *screen, rtex->db_compatible = true; if (!(sscreen->debug_flags & DBG(NO_HYPERZ))) r600_texture_allocate_htile(sscreen, rtex); } } else { if (base->nr_samples > 1) { if (!buf) { r600_texture_allocate_fmask(sscreen, rtex); r600_texture_allocate_cmask(sscreen, rtex); - rtex->cmask_buffer = &rtex->resource; } if (!rtex->fmask.size || !rtex->cmask.size
Re: [Mesa-dev] [PATCH 1/1] nir: Use a freelist in nir_opt_dce to avoid spamming ralloc
Yup, most definitely. I just have one more thing to test before sending out a V2. I've toyed around with arrays and sets and stuff to see if there are better options than a linked list. At least for now the answer is: "no, there isn't", but I'm gonna test u_vector for this use later today to see if that is even better. Expect new patch this evening CET. 2018-03-14 20:58 GMT+01:00 Dieter Nützel : > Hello Thomas, > > is this useful even after '[Mesa-dev] [PATCH 0/2] V2: Use hash table cloning > in copy propagation' landed? > > I've running both together with Dave's '[Mesa-dev] [PATCH] radv/winsys: > replace bo list searchs with a hash table.' patch. > > Dieter > > > Am 24.01.2018 08:33, schrieb Thomas Helland: >> >> 2018-01-21 23:58 GMT+01:00 Eric Anholt : >>> >>> Thomas Helland writes: >>> Also, allocate worklist_elem in groups of 20, to reduce the burden of allocation. Do not use rzalloc, as there is no need. This lets us drop the number of calls to ralloc from aproximately 10% of all calls to ralloc(130 000 calls), down to a mere 2000 calls to ralloc_array_size. This cuts the runtime of shader-db by 1%, while at the same time reducing the number of stalled cycles, executed cycles, and executed instructions by about 1 % as reported by perf. I did a five-run benchmark pre and post and got a statistical variance less than 0.1% pre and post. This was with i965's ir validation polluting the benchmark, so the numbers are even better in release builds. Performance change as found with perf-diff: 4.74% -0.23% libc-2.26.so[.] _int_malloc 1.88% -0.21% libc-2.26.so[.] malloc 2.27% +0.16% libmesa_dri_drivers.so [.] match_value.part.7 2.95% -0.12% libc-2.26.so[.] _int_free +0.11% libmesa_dri_drivers.so [.] worklist_push 1.22% -0.08% libc-2.26.so[.] malloc_consolidate 0.16% -0.06% libmesa_dri_drivers.so [.] mark_live_cb 1.21% +0.06% libmesa_dri_drivers.so [.] match_expression.part.6 0.75% -0.05% libc-2.26.so[.] cfree@GLIBC_2.2.5 0.50% -0.05% libmesa_dri_drivers.so [.] ralloc_size 0.57% +0.04% libmesa_dri_drivers.so [.] nir_replace_instr 1.29% -0.04% libmesa_dri_drivers.so [.] unsafe_free >>> >>> >>> I'm curious, since a NIR instruction worklist seems like a generally >>> useful thing to have: >>> >>> Could nir_worklist.c keep the implementation of this? >>> >>> Also, I wonder if it wouldn't be even better to have a u_dynarray of >>> instructions in the worklist, with push/pop on the end of the array, and >>> a struct set tracking the instructions in the array to avoid >>> double-adding. I actually don't know if that would be better or not, so >>> I'd be happy with the worklist management just moved to nir_worklist.c. >> >> >> I'll look into this to see what I can do. nir_worklist.c at this time has >> only >> a block worklist. This numbers all the blocks, uses a bitset for checking >> if the item is present, and uses an array with an index pointing to the >> start of the queue of blocks in the buffer. >> >> The same scheme could be easily used for ssa-defs, as these are >> also numbered. I actually did this for the VRP pass I wrote years ago. >> >> However, for instructions we do not have a way of numbering them, >> so a different scheme would have to be used. A dynarray + set type >> of thing, us you're suggesting, might get us where we want. >> I'll see what I can come up with. >> ___ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v4 12/18] i965/blorp: Update the fast clear color address.
For the subject, instead of 'address', what about something like: i965/blorp: Update the fast clear value buffer On 2018-03-08 08:49:05, Rafael Antognolli wrote: > On Gen10, whenever we do a fast clear, blorp will update the clear color > state buffer for us, as long as we set the clear color address > correctly. > > However, on a hiz clear, if the surface is already on the fast clear > state we skip the actual fast clear operation and, before gen10, only > updated the miptree. On gen10+ we need to update the clear value state > buffer too, since blorp will not be doing a fast clear and updating it > for us. > > v4: > - do not use clear_value_size in the for loop > - Get the address of the clear color from the aux buffer or the > clear_color_bo, depending on which one is available. > - let core blorp update the clear color, but also update it when we > skip a fast clear depth. > > Signed-off-by: Rafael Antognolli > --- > src/mesa/drivers/dri/i965/brw_blorp.c | 11 +++ > src/mesa/drivers/dri/i965/brw_clear.c | 22 ++ > 2 files changed, 33 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c > b/src/mesa/drivers/dri/i965/brw_blorp.c > index ffd957fb866..914aeeace7a 100644 > --- a/src/mesa/drivers/dri/i965/brw_blorp.c > +++ b/src/mesa/drivers/dri/i965/brw_blorp.c > @@ -185,6 +185,17 @@ blorp_surf_for_miptree(struct brw_context *brw, > >surf->aux_addr.buffer = aux_buf->bo; >surf->aux_addr.offset = aux_buf->offset; > + > + if (devinfo->gen >= 10) { > + /* If we have a CCS surface and clear_color_bo set, use that bo as > + * storage for the indirect clear color. Otherwise, use the extra > + * space at the end of the aux_buffer. > + */ > + surf->clear_color_addr = (struct blorp_address) { > +.buffer = aux_buf->clear_color_bo, > +.offset = aux_buf->clear_color_offset, > + }; > + } > } else { >surf->aux_addr = (struct blorp_address) { > .buffer = NULL, > diff --git a/src/mesa/drivers/dri/i965/brw_clear.c > b/src/mesa/drivers/dri/i965/brw_clear.c > index 8aa83722ee9..63c0b241898 100644 > --- a/src/mesa/drivers/dri/i965/brw_clear.c > +++ b/src/mesa/drivers/dri/i965/brw_clear.c > @@ -108,6 +108,7 @@ brw_fast_clear_depth(struct gl_context *ctx) > struct intel_mipmap_tree *mt = depth_irb->mt; > struct gl_renderbuffer_attachment *depth_att = > &fb->Attachment[BUFFER_DEPTH]; > const struct gen_device_info *devinfo = &brw->screen->devinfo; > + bool same_clear_value = true; > > if (devinfo->gen < 6) >return false; > @@ -213,6 +214,7 @@ brw_fast_clear_depth(struct gl_context *ctx) >} > >intel_miptree_set_depth_clear_value(ctx, mt, clear_value); > + same_clear_value = false; > } > > bool need_clear = false; > @@ -232,6 +234,26 @@ brw_fast_clear_depth(struct gl_context *ctx) > * state then simply updating the miptree fast clear value is > sufficient > * to change their clear value. > */ > + if (devinfo->gen >= 10 && !same_clear_value) { > + /* Before gen10, it was enough to just update the clear value in the > + * miptree. But on gen10+, we let blorp update the clear value state > + * buffer when doing a fast clear. Since we are skipping the fast > + * clear here, we need to update the clear color ourselves. > + */ > + uint32_t clear_offset = mt->hiz_buf->clear_color_offset; > + union isl_color_value clear_color = { .f32 = { clear_value, } }; > + > + /* We can't update the clear color while the hardware is still using > + * the previous one for a resolve or sampling from it. So make sure > + * that there's no pending commands at this point. > + */ > + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL); I think we talked about potential performance concerns over the stall, but we decided it was probably unlikely that an application would clear the buffer multiple times with different values. I just wanted to mention it in case anyone else has other opinions on it. 11 - 12 Reviewed-by: Jordan Justen > + for (int i = 0; i < 4; i++) { > +brw_store_data_imm32(brw, mt->hiz_buf->clear_color_bo, > + clear_offset + i * 4, clear_color.u32[i]); > + } > + brw_emit_pipe_control_flush(brw, > PIPE_CONTROL_STATE_CACHE_INVALIDATE); > + } >return true; > } > > -- > 2.14.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] vbo: Correctly handle source arrays in vbo_split_copy.
Hi, On Wednesday, 14 March 2018 22:28:28 CET Brian Paul wrote: > Reviewed-by: Brian Paul Thanks, pushed! best Mathias ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC] nir: Add a deref instruction type
This commit adds a new instruction type to NIR for handling derefs. Nothing uses it yet but this adds the data structure as well as all of the code to validate, print, clone, and [de]serialize them. Cc: Rob Clark Cc: Connor Abbott --- This is not tested beyond compile testing. I'm sending it out ahead so that people can comment on the instruction data structure. I think this should handle all the SPIR-V use-cases fairly nicely as well as the use-cases we have today. src/compiler/nir/nir.c | 49 +++ src/compiler/nir/nir.h | 47 +- src/compiler/nir/nir_clone.c | 45 + src/compiler/nir/nir_print.c | 46 ++ src/compiler/nir/nir_serialize.c | 85 src/compiler/nir/nir_validate.c | 67 +++ 6 files changed, 338 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index a97b119..1023eb9 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -469,6 +469,26 @@ nir_alu_instr_create(nir_shader *shader, nir_op op) return instr; } +nir_deref_instr * +nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type) +{ + nir_deref_instr *instr = + rzalloc_size(shader, sizeof(nir_deref_instr)); + + instr_init(&instr->instr, nir_instr_type_deref); + + instr->deref_type = deref_type; + if (deref_type != nir_deref_type_var) + src_init(&instr->parent); + + if (deref_type == nir_deref_type_array_indirect) + src_init(&instr->arr.indirect); + + dest_init(&instr->dest); + + return instr; +} + nir_jump_instr * nir_jump_instr_create(nir_shader *shader, nir_jump_type type) { @@ -1198,6 +1218,12 @@ visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb cb, void *state) } static bool +visit_deref_dest(nir_deref_instr *instr, nir_foreach_dest_cb cb, void *state) +{ + return cb(&instr->dest, state); +} + +static bool visit_intrinsic_dest(nir_intrinsic_instr *instr, nir_foreach_dest_cb cb, void *state) { @@ -1238,6 +1264,8 @@ nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state) switch (instr->type) { case nir_instr_type_alu: return visit_alu_dest(nir_instr_as_alu(instr), cb, state); + case nir_instr_type_deref: + return visit_deref_dest(nir_instr_as_deref(instr), cb, state); case nir_instr_type_intrinsic: return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state); case nir_instr_type_tex: @@ -1349,6 +1377,23 @@ visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state) } static bool +visit_deref_instr_src(nir_deref_instr *instr, + nir_foreach_src_cb cb, void *state) +{ + if (instr->deref_type != nir_deref_type_var) { + if (!visit_src(&instr->parent, cb, state)) + return false; + } + + if (instr->deref_type == nir_deref_type_array_indirect) { + if (!visit_src(&instr->arr.indirect, cb, state)) + return false; + } + + return true; +} + +static bool visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state) { for (unsigned i = 0; i < instr->num_srcs; i++) { @@ -1436,6 +1481,10 @@ nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state) if (!visit_alu_src(nir_instr_as_alu(instr), cb, state)) return false; break; + case nir_instr_type_deref: + if (!visit_deref_instr_src(nir_instr_as_deref(instr), cb, state)) + return false; + break; case nir_instr_type_intrinsic: if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state)) return false; diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 839d403..a40a3a0 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -421,6 +421,7 @@ typedef struct nir_register { typedef enum { nir_instr_type_alu, + nir_instr_type_deref, nir_instr_type_call, nir_instr_type_tex, nir_instr_type_intrinsic, @@ -888,7 +889,10 @@ bool nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2, typedef enum { nir_deref_type_var, nir_deref_type_array, - nir_deref_type_struct + nir_deref_type_struct, + nir_deref_type_array_direct, + nir_deref_type_array_indirect, + nir_deref_type_array_wildcard, } nir_deref_type; typedef struct nir_deref { @@ -950,6 +954,42 @@ nir_deref_tail(nir_deref *deref) typedef struct { nir_instr instr; + /** The type of this deref instruction */ + nir_deref_type deref_type; + + /** The mode of the underlying variable */ + nir_variable_mode mode; + + /** The dereferenced type of the resulting pointer value */ + const struct glsl_type *type; + + union { + /** Variable being dereferenced if deref_type is a deref_var */ + nir_variable *var; + + /** Parent deref if deref_type is not deref_var */ + nir_src parent; + }; + + /** Addi
[Mesa-dev] [Bug 105464] Reading per-patch outputs in Tessellation Control Shader returns undefined values
https://bugs.freedesktop.org/show_bug.cgi?id=105464 --- Comment #6 from Clément Guérin --- I can confirm that the tessellation demo is broken without Philip's patch on mesa 03e37ec6d7 and llvm-svn 327550 on R9 Fury. -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH mesa 1/2] Add processor topology calculation implementation for Darwin/OSX targets.
Code looks good. I'm not able to test it, but it's isolated within #defined( __APPLE__). Thanks for the contribution. Reviewed-by: Bruce Cherniak > On Mar 14, 2018, at 6:19 PM, Jeremy Huddleston Sequoia > wrote: > > From: Apple SWE > > The implementation for bootstrapping SWR on Darwin targets is based on the > Linux version. > Instead of reading the output of /proc/cpuinfo, sysctlbyname is used to > determine the > physical identifiers, processor identifiers, core counts and thread-processor > affinities. > > With this patch, it is possible to use SWR as an alternate renderer on OSX to > softpipe and > llvmpipe. > > Reviewed-by: Jeremy Huddleston Sequoia > Signed-off-by: Jeremy Huddleston Sequoia > --- > .../drivers/swr/rasterizer/core/threads.cpp| 56 +- > 1 file changed, 55 insertions(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp > b/src/gallium/drivers/swr/rasterizer/core/threads.cpp > index 4d79168d2d..3eb20abcbf 100644 > --- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp > +++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp > @@ -36,6 +36,11 @@ > #include > #endif > > +#ifdef __APPLE__ > +#include > +#include > +#endif > + > #include "common/os.h" > #include "context.h" > #include "frontend.h" > @@ -219,6 +224,56 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, > uint32_t& out_numThread > > #elif defined(__APPLE__) > > +auto numProcessors = 0; > +auto numCores = 0; > +auto numPhysicalIds = 0; > + > +int value; > +size_t size = sizeof(value); > + > +int result = sysctlbyname("hw.packages", &value, &size, NULL, 0); > +SWR_ASSERT(result == 0); > +numPhysicalIds = value; > + > +result = sysctlbyname("hw.logicalcpu", &value, &size, NULL, 0); > +SWR_ASSERT(result == 0); > +numProcessors = value; > + > +result = sysctlbyname("hw.physicalcpu", &value, &size, NULL, 0); > +SWR_ASSERT(result == 0); > +numCores = value; > + > +out_nodes.resize(numPhysicalIds); > + > +for (auto physId = 0; physId < numPhysicalIds; ++physId) > +{ > +auto &numaNode = out_nodes[physId]; > +auto procId = 0; > + > +numaNode.cores.resize(numCores); > + > +while (procId < numProcessors) > +{ > +for (auto coreId = 0; coreId < numaNode.cores.size(); ++coreId, > ++procId) > +{ > +auto &core = numaNode.cores[coreId]; > + > +core.procGroup = coreId; > +core.threadIds.push_back(procId); > +} > +} > +} > + > +out_numThreadsPerProcGroup = 0; > + > +for (auto &node : out_nodes) > +{ > +for (auto &core : node.cores) > +{ > +out_numThreadsPerProcGroup += core.threadIds.size(); > +} > +} > + > #else > > #error Unsupported platform > @@ -253,7 +308,6 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, > uint32_t& out_numThread > } > } > > - > void bindThread(SWR_CONTEXT* pContext, uint32_t threadId, uint32_t > procGroupId = 0, bool bindProcGroup=false) > { > // Only bind threads when MAX_WORKER_THREADS isn't set. > -- > 2.16.1 (Apple Git-102) > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 04/50] glsl: Add "built-in" functions to do eq(fp64, fp64)
> Is a mix really warranted here? Could just use > return result && !(isaNaN || isbNaN) (since the other mix part is just > false I think mix is a bit overkill, albeit it might not really make a > difference). > Actually I think it should be simplified, you don't need to check both > vars for NaN (because if just one is NaN, the comparison(s) will be > false anyway). > so just return result && !isaNaN This saves 2 instructions, granted that is a minor drop in a very large ocean, but seems fine to just do it. Dave. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH mesa 2/2] sched.h needs to be imported on Darwin/OSX targets.
We don't currently build SWR on OS X, I've had difficulty building Mesa on OS X in general. But, I'd be very interesting in learning. Thanks for the patch. Reviewed-by: Bruce Cherniak > On Mar 14, 2018, at 6:19 PM, Jeremy Huddleston Sequoia > wrote: > > From: Apple SWE > > sched_yield is used but the include reference on Darwin is missing. This patch > conditionally guards on Darwin/OSX to import sched.h first. > > Reviewed-by: Jeremy Huddleston Sequoia > Signed-off-by: Jeremy Huddleston Sequoia > --- > src/gallium/drivers/swr/swr_fence.cpp | 4 > 1 file changed, 4 insertions(+) > > diff --git a/src/gallium/drivers/swr/swr_fence.cpp > b/src/gallium/drivers/swr/swr_fence.cpp > index 3005eb9aaa..b05ac8cec0 100644 > --- a/src/gallium/drivers/swr/swr_fence.cpp > +++ b/src/gallium/drivers/swr/swr_fence.cpp > @@ -29,6 +29,10 @@ > #include "swr_screen.h" > #include "swr_fence.h" > > +#ifdef __APPLE__ > +#include > +#endif > + > #if defined(PIPE_CC_MSVC) // portable thread yield >#define sched_yield SwitchToThread > #endif > -- > 2.16.1 (Apple Git-102) > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] FLAG-DAY: NIR derefs
On Wed, Mar 14, 2018 at 8:44 PM, Connor Abbott wrote: > On Wed, Mar 14, 2018 at 6:07 PM, Rob Clark wrote: >> On Wed, Mar 14, 2018 at 7:42 PM, Connor Abbott wrote: >>> On Wed, Mar 14, 2018 at 5:05 PM, Rob Clark wrote: On Wed, Mar 14, 2018 at 4:58 PM, Connor Abbott wrote: > FWIW, the way I imagined doing this was something like: > > 1. Add nir_deref_instr and nir_deref_type_pointer. At this point, just > make everything assert if the base deref isn't a nir_deref_var. This > will be a bit of a flag-day, but also very mechanical. It'll also help > us catch cases where we don't handle new-style derefs later. > 2. Add a pass to flatten nir_deref_type_pointer into > nir_deref_type_var if possible (i.e. if there's a clear chain up to > the base variable without any phi nodes or whatever). This should > always be possible for GLSL, as well as SPIR-V unless > KHR_variable_pointers is enabled. We'll use this to avoid too much > churn in drivers, passes that haven't been updated, etc. We might also > want a pass to do the opposite, for converting passes where we don't > want to have codepaths for both forms at once. btw, does it seem reasonable to assert that deref instruction src's are *always* in SSA form? That seems reasonable to me since they will be mostly lowered away before the driver sees them (and I think makes some of the operation on them easier), and I can't think of any way for them *not* to be SSA (since they aren't real instructions). >>> >>> I think so... as long as you don't lower locals to regs before >>> lowering everything to explicit address arithmetic. Although, with the >>> physical memory model, it's just another source like any other so I'm >>> not sure if there's a point. >>> >> >> I think w/ phys memory model, we could lower away the deref's before >> going to regs. That *seems* like a reasonable requirement to me. >> If so, my rough thoughts are a deref instruction chain (formed by ssa links to previous deref instruction) either start w/ nir_deref_instr_pointer or nir_deref_instruction_var instructions at the head of the list (to start, I guess you could ignore adding the nir_deref_instr_pointer instruction and I could add that for clover/spirv work). Followed by N links of struct/array deref_link instructions that have two ssa src's (one that is previous deref instruction and one that is array or struct member offset) >>> >>> Why would you need a separate nir_deref_instr_pointer? Do you want to >>> put information like what type of pointer it is in there? Maybe we >>> could just make that part of every nir_deref_instr instead? >> >> well, in clc you could hypotheticaly do something like: >> >> __global struct Foo *f = (struct Foo *)0x1234; >> >> so you don't necessarily have a var at the start of your deref chain. >> >> More realistic example is: >> >> ptr->a.b->c.d >> >> which is really two deref chains, first starting at a var, second >> starting at an ssa ptr (which I think realistically ends up needing to >> be a fat pointer to deal w/ cl's multiple address spaces[1]), with an >> intermediate load_global or load_shared intrinsic in between. >> >> Anyways, don't want to derail the conversion to deref instructions too >> much, but I do think we need something different for "var" vs "ptr" >> (and the nice thing about deref chains is this should be easier to >> add) > > My point was that you don't really need a distinction, as long as > deref instructions can accept any old pointer. In your second example, > there would be a struct deref, a load, and then a second struct deref > using the result of the load. This is similar to how it's done in > LLVM. > I guess that comes down to how we define what a pointer is.. if it is abstract enough to deal with both logical pointers (ie. something that refers back to a var of some sort) or physical pointers, I guess that can work.. at this point I'm not too picky about the color of that bikeshed ;-) fwiw, on the topic of abstracting deref chains in prep of changing things: https://github.com/freedreno/mesa/commits/deref-chains I've gotten approx (I think) about half way thru going thru the intr->variables[n]->var callsites, and will pick it up in the morning. I think we need to do similar for nir_tex_instr (but that looks like only a few call-sites). But as long as we can assume deref chains are in SSA I think some refactoring like that up front will make the conversion easier (ie. accessors will be easy to convert over to new world order). BR, -R ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH shaderdb] run: -p option accepts hex format pci-id
-p option now takes hex format pci-id of target architecture. Signed-off-by: Dongwon Kim --- run.c | 35 +-- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/run.c b/run.c index 69e64c7..3db97ec 100644 --- a/run.c +++ b/run.c @@ -356,7 +356,8 @@ const struct platform platforms[] = { void print_usage(const char *prog_name) { fprintf(stderr, -"Usage: %s [-d ] [-j ] [-o ] [-p ] \n", +"Usage: %s [-d ] [-j ] [-o ] [-p ] \n", prog_name); } @@ -456,6 +457,7 @@ main(int argc, char **argv) break; case 'p': { const struct platform *platform = NULL; + for (unsigned i = 0; i < ARRAY_SIZE(platforms); i++) { if (strcasecmp(optarg, platforms[i].name) == 0) { platform = platforms + i; @@ -463,17 +465,30 @@ main(int argc, char **argv) } } -if (platform == NULL) { -fprintf(stderr, "Invalid platform.\nValid platforms are:"); -for (unsigned i = 0; i < ARRAY_SIZE(platforms); i++) -fprintf(stderr, " %s", platforms[i].name); -fprintf(stderr, "\n"); -return -1; +if (platform) { +printf("### Compiling for %s(PCI_ID=%s) ###\n", platform->name, + platform->pci_id); +setenv("INTEL_DEVID_OVERRIDE", platform->pci_id, 1); +break; } -printf("### Compiling for %s ###\n", platform->name); -setenv("INTEL_DEVID_OVERRIDE", platform->pci_id, 1); -break; +if (optarg[0] == '0' && optarg[1] == 'x') { +/* check if rest of given string indicates hex number */ +if (strtol(optarg, NULL, 16) > 0) { +setenv("INTEL_DEVID_OVERRIDE", optarg, 1); +printf("### Compiling for GEN arch with PCI_ID=%s ###\n", + optarg); +break; +} +} + +fprintf(stderr, "Invalid platform.\nValid platforms are:"); +for (unsigned i = 0; i < ARRAY_SIZE(platforms); i++) +fprintf(stderr, " %s", platforms[i].name); + +fprintf(stderr, "\n"); +fprintf(stderr, "Or\nPCI-ID of other supported platform.\n"); +return -1; } case 'j': max_threads = atoi(optarg); -- 2.16.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] FLAG-DAY: NIR derefs
On Wed, Mar 14, 2018 at 6:07 PM, Rob Clark wrote: > On Wed, Mar 14, 2018 at 7:42 PM, Connor Abbott wrote: >> On Wed, Mar 14, 2018 at 5:05 PM, Rob Clark wrote: >>> On Wed, Mar 14, 2018 at 4:58 PM, Connor Abbott wrote: FWIW, the way I imagined doing this was something like: 1. Add nir_deref_instr and nir_deref_type_pointer. At this point, just make everything assert if the base deref isn't a nir_deref_var. This will be a bit of a flag-day, but also very mechanical. It'll also help us catch cases where we don't handle new-style derefs later. 2. Add a pass to flatten nir_deref_type_pointer into nir_deref_type_var if possible (i.e. if there's a clear chain up to the base variable without any phi nodes or whatever). This should always be possible for GLSL, as well as SPIR-V unless KHR_variable_pointers is enabled. We'll use this to avoid too much churn in drivers, passes that haven't been updated, etc. We might also want a pass to do the opposite, for converting passes where we don't want to have codepaths for both forms at once. >>> >>> btw, does it seem reasonable to assert that deref instruction src's >>> are *always* in SSA form? That seems reasonable to me since they will >>> be mostly lowered away before the driver sees them (and I think makes >>> some of the operation on them easier), and I can't think of any way >>> for them *not* to be SSA (since they aren't real instructions). >> >> I think so... as long as you don't lower locals to regs before >> lowering everything to explicit address arithmetic. Although, with the >> physical memory model, it's just another source like any other so I'm >> not sure if there's a point. >> > > I think w/ phys memory model, we could lower away the deref's before > going to regs. That *seems* like a reasonable requirement to me. > >>> >>> If so, my rough thoughts are a deref instruction chain (formed by ssa >>> links to previous deref instruction) either start w/ >>> nir_deref_instr_pointer or nir_deref_instruction_var instructions at >>> the head of the list (to start, I guess you could ignore adding the >>> nir_deref_instr_pointer instruction and I could add that for >>> clover/spirv work). Followed by N links of struct/array deref_link >>> instructions that have two ssa src's (one that is previous deref >>> instruction and one that is array or struct member offset) >> >> Why would you need a separate nir_deref_instr_pointer? Do you want to >> put information like what type of pointer it is in there? Maybe we >> could just make that part of every nir_deref_instr instead? > > well, in clc you could hypotheticaly do something like: > > __global struct Foo *f = (struct Foo *)0x1234; > > so you don't necessarily have a var at the start of your deref chain. > > More realistic example is: > > ptr->a.b->c.d > > which is really two deref chains, first starting at a var, second > starting at an ssa ptr (which I think realistically ends up needing to > be a fat pointer to deal w/ cl's multiple address spaces[1]), with an > intermediate load_global or load_shared intrinsic in between. > > Anyways, don't want to derail the conversion to deref instructions too > much, but I do think we need something different for "var" vs "ptr" > (and the nice thing about deref chains is this should be easier to > add) My point was that you don't really need a distinction, as long as deref instructions can accept any old pointer. In your second example, there would be a struct deref, a load, and then a second struct deref using the result of the load. This is similar to how it's done in LLVM. > > BR, > -R > > [1] kinda a different topic.. short version is I'm leaning towards a > nir_deref_instr_pointer taking a two component vector as it's src so > it can be lowered to an if/else chain to deal with different address > spaces, and then let opt passes clean things up so driver ends up with > either load/store_global or load/store_local, etc > > >> >>> 3. Modify nir_lower_io to handle new-style derefs, especially for shared variables (i.e. KHR_variable_pointers for anv). We might have to modify a few other passes, too. 4. Add the required deref lowering passes to all drivers. 5. Rewrite glsl_to_nir and spirv_to_nir to emit the new-style derefs. At the very least, we should be using this to implement the shared variable bits of KHR_variable_pointers. If we add stride/offset annotations to nir_deref_instr for UBO's and SSBO's, then we might also be able to get rid of the vtn_deref stuff entirely (although I'm not sure if that should be a goal right now). >>> >>> I think I might try to prototype something where we convert vtn over >>> to new-style deref instructions, plus a pass to lower to old style >>> deref chains. It partly comes down to how quickly I can finish a >>> couple other things, and how much I can't sleep on a long-ass flight. >>> (I guess even if throw-away
Re: [Mesa-dev] [PATCH v4 10/18] i965/miptree: Add new BO for clear color.
What about a subject like this? i965/miptree: Add new clear color BO for winsys aux buffers On 2018-03-08 08:49:03, Rafael Antognolli wrote: > Add an extra BO to store clear color when we receive the aux buffer from > the window system. Since we have no control over the aux buffer size in > this case, we need the new BO to store only the clear color. > > Signed-off-by: Rafael Antognolli > --- > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 18 ++ > 1 file changed, 18 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > index 22d0ae89367..a8b89d9170a 100644 > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > @@ -969,6 +969,23 @@ create_ccs_buf_for_image(struct brw_context *brw, >return false; > } > > + /* On gen10+ we start using an extra space in the aux buffer to store the > +* indirect clear color. However, if we imported an image from the window > +* system with CCS, we don't have the extra space at the end of the aux > +* buffer. So create a new bo here that will store that clear color. > +*/ > + const struct gen_device_info *devinfo = &brw->screen->devinfo; > + if (devinfo->gen >= 10) { > + mt->mcs_buf->clear_color_bo = > + brw_bo_alloc(brw->bufmgr, "clear_color_bo", > + brw->isl_dev.ss.clear_color_state_size, 64); > + if (!mt->mcs_buf->clear_color_bo) { > + free(mt->mcs_buf); > + mt->mcs_buf = NULL; > + return false; > + } > + } > + > mt->mcs_buf->bo = image->bo; > brw_bo_reference(image->bo); > > @@ -1211,6 +1228,7 @@ intel_miptree_aux_buffer_free(struct > intel_miptree_aux_buffer *aux_buf) >return; > > brw_bo_unreference(aux_buf->bo); > + brw_bo_unreference(aux_buf->clear_color_bo); Should this be added in the previous patch? Should it only happen when gen >= 10? I guess it will be null for gen < 10, so this will be a no-op. -Jordan > > free(aux_buf); > } > -- > 2.14.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH mesa 1/2] Add processor topology calculation implementation for Darwin/OSX targets.
Sent from my iPhone... > On Mar 14, 2018, at 16:41, Matt Turner wrote: > > Subject should have a swr prefix or similar. > > On Wed, Mar 14, 2018 at 4:19 PM, Jeremy Huddleston Sequoia > wrote: >> From: Apple SWE > > Explain? I didn’t author it. Author is an Apple Employee in SWE who does not want attribution. > >> >> The implementation for bootstrapping SWR on Darwin targets is based on the >> Linux version. >> Instead of reading the output of /proc/cpuinfo, sysctlbyname is used to >> determine the >> physical identifiers, processor identifiers, core counts and >> thread-processor affinities. >> >> With this patch, it is possible to use SWR as an alternate renderer on OSX >> to softpipe and >> llvmpipe. > > All of these look like they're too long to fix in 80 columns in git show. > >> Reviewed-by: Jeremy Huddleston Sequoia > > I'm guessing you're just pushing code someone else wrote... > >> Signed-off-by: Jeremy Huddleston Sequoia > > We don't have a DCO, so Signed-off-by has no meaning. Worse, in the > case we added a DCO in the future, all of the stupid Signed-off-by's > people have been cargo culting for years would confuse everything. > Please stop doing it. Ok. I’ll remove it. > > Please don't push code to maintained drivers without going through the > mailing list. I feel like I shouldn't have to say that. In the past there hasn’t been much care about code in __APPLE__, so I figured it want that big of a deal, but if you are interested in reviewing these changes, that’s great to hear. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] anv/pipeline: set active_stages early
Since the intermediate states of active_stages are not used, i.e. active_stages is read only after all stages were set into it, just set its value before compiling the shaders. This will allow to conditionally run certain passes based on what other shaders are being used, e.g. a certain pass might only be applicable to the vertex shader if there's no geometry or tessellation shader being used. --- src/intel/vulkan/anv_pipeline.c | 12 +--- src/intel/vulkan/genX_pipeline.c | 1 + 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index cb34f3be77..fba0039240 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -501,7 +501,6 @@ anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, struct anv_shader_bin *shader) { pipeline->shaders[stage] = shader; - pipeline->active_stages |= mesa_to_vk_shader_stage(stage); } static VkResult @@ -1334,11 +1333,18 @@ anv_pipeline_init(struct anv_pipeline *pipeline, const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = {}; struct anv_shader_module *modules[MESA_SHADER_STAGES] = {}; for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { - gl_shader_stage stage = ffs(pCreateInfo->pStages[i].stage) - 1; + VkShaderStageFlagBits vk_stage = pCreateInfo->pStages[i].stage; + gl_shader_stage stage = ffs(vk_stage) - 1; pStages[stage] = &pCreateInfo->pStages[i]; modules[stage] = anv_shader_module_from_handle(pStages[stage]->module); + pipeline->active_stages |= vk_stage; } + if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) + pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; + + assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT); + if (modules[MESA_SHADER_VERTEX]) { result = anv_pipeline_compile_vs(pipeline, cache, pCreateInfo, modules[MESA_SHADER_VERTEX], @@ -1378,7 +1384,7 @@ anv_pipeline_init(struct anv_pipeline *pipeline, goto compile_fail; } - assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT); + assert(pipeline->shaders[MESA_SHADER_VERTEX]); anv_pipeline_setup_l3_config(pipeline, false); diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 9c08bc2033..eb2d414735 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -1787,6 +1787,7 @@ compute_pipeline_create( pipeline->needs_data_cache = false; assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT); + pipeline->active_stages |= VK_SHADER_STAGE_COMPUTE_BIT; ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->stage.module); result = anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module, pCreateInfo->stage.pName, -- 2.16.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] anv/pipeline: fail if tcs/tes compile fail
--- src/intel/vulkan/anv_pipeline.c | 16 +--- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 9cfd16df2a..cb34f3be77 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -1349,13 +1349,15 @@ anv_pipeline_init(struct anv_pipeline *pipeline, } if (modules[MESA_SHADER_TESS_EVAL]) { - anv_pipeline_compile_tcs_tes(pipeline, cache, pCreateInfo, - modules[MESA_SHADER_TESS_CTRL], - pStages[MESA_SHADER_TESS_CTRL]->pName, - pStages[MESA_SHADER_TESS_CTRL]->pSpecializationInfo, - modules[MESA_SHADER_TESS_EVAL], - pStages[MESA_SHADER_TESS_EVAL]->pName, - pStages[MESA_SHADER_TESS_EVAL]->pSpecializationInfo); + result = anv_pipeline_compile_tcs_tes(pipeline, cache, pCreateInfo, +modules[MESA_SHADER_TESS_CTRL], + pStages[MESA_SHADER_TESS_CTRL]->pName, + pStages[MESA_SHADER_TESS_CTRL]->pSpecializationInfo, +modules[MESA_SHADER_TESS_EVAL], + pStages[MESA_SHADER_TESS_EVAL]->pName, + pStages[MESA_SHADER_TESS_EVAL]->pSpecializationInfo); + if (result != VK_SUCCESS) + goto compile_fail; } if (modules[MESA_SHADER_GEOMETRY]) { -- 2.16.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v4 08/18] intel/blorp: Update clear color state buffer during fast clears.
On 2018-03-08 08:49:01, Rafael Antognolli wrote: > We always want to update the fast clear color during a fast clear on > i965. On anv, we doing that before a resolve, but by adding support to s/we/we are/ ? Patches 5 - 9: Reviewed-by: Jordan Justen > blorp, we can do a similar thing and update it during a fast clear > instead. > > The goal is to remove some code from anv that does such update, and > centralize everything in blorp, hopefully removing a lot of code > duplication. It also allows us to have a similar behavior on gen < 9 and > gen >= 10. > > Signed-off-by: Rafael Antognolli > --- > src/intel/blorp/blorp_genX_exec.h | 48 > +++ > 1 file changed, 48 insertions(+) > > diff --git a/src/intel/blorp/blorp_genX_exec.h > b/src/intel/blorp/blorp_genX_exec.h > index c68767a2faa..eef6ed8291a 100644 > --- a/src/intel/blorp/blorp_genX_exec.h > +++ b/src/intel/blorp/blorp_genX_exec.h > @@ -1642,6 +1642,51 @@ blorp_emit_gen8_hiz_op(struct blorp_batch *batch, > } > #endif > > +static void > +blorp_update_clear_color(struct blorp_batch *batch, > + const struct brw_blorp_surface_info *info, > + enum isl_aux_op op) > +{ > + if (info->clear_color_addr.buffer && op == ISL_AUX_OP_FAST_CLEAR) { > +#if GEN_GEN >= 9 > + for (int i = 0; i < 4; i++) { > + blorp_emit(batch, GENX(MI_STORE_DATA_IMM), sdi) { > +sdi.Address = info->clear_color_addr; > +sdi.Address.offset += i * 4; > +sdi.ImmediateData = info->clear_color.u32[i]; > + } > + } > +#elif GEN_GEN >= 7 > + blorp_emit(batch, GENX(MI_STORE_DATA_IMM), sdi) { > + sdi.Address = info->clear_color_addr; > + sdi.ImmediateData = ISL_CHANNEL_SELECT_RED << 25 | > + ISL_CHANNEL_SELECT_GREEN << 22 | > + ISL_CHANNEL_SELECT_BLUE << 19 | > + ISL_CHANNEL_SELECT_ALPHA << 16; > + if (isl_format_has_int_channel(info->view.format)) { > +for (unsigned i = 0; i < 4; i++) { > + assert(info->clear_color.u32[i] == 0 || > + info->clear_color.u32[i] == 1); > +} > +sdi.ImmediateData |= (info->clear_color.u32[0] != 0) << 31; > +sdi.ImmediateData |= (info->clear_color.u32[1] != 0) << 30; > +sdi.ImmediateData |= (info->clear_color.u32[2] != 0) << 29; > +sdi.ImmediateData |= (info->clear_color.u32[3] != 0) << 28; > + } else { > +for (unsigned i = 0; i < 4; i++) { > + assert(info->clear_color.f32[i] == 0.0f || > + info->clear_color.f32[i] == 1.0f); > +} > +sdi.ImmediateData |= (info->clear_color.f32[0] != 0.0f) << 31; > +sdi.ImmediateData |= (info->clear_color.f32[1] != 0.0f) << 30; > +sdi.ImmediateData |= (info->clear_color.f32[2] != 0.0f) << 29; > +sdi.ImmediateData |= (info->clear_color.f32[3] != 0.0f) << 28; > + } > + } > +#endif > + } > +} > + > /** > * \brief Execute a blit or render pass operation. > * > @@ -1654,6 +1699,9 @@ blorp_emit_gen8_hiz_op(struct blorp_batch *batch, > static void > blorp_exec(struct blorp_batch *batch, const struct blorp_params *params) > { > + blorp_update_clear_color(batch, ¶ms->dst, params->fast_clear_op); > + blorp_update_clear_color(batch, ¶ms->depth, params->hiz_op); > + > #if GEN_GEN >= 8 > if (params->hiz_op != ISL_AUX_OP_NONE) { >blorp_emit_gen8_hiz_op(batch, params); > -- > 2.14.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH mesa 1/2] Add processor topology calculation implementation for Darwin/OSX targets.
On Wed, Mar 14, 2018 at 5:07 PM, Jeremy Sequoia wrote: >> Please don't push code to maintained drivers without going through the >> mailing list. I feel like I shouldn't have to say that. > > In the past there hasn’t been much care about code in __APPLE__, so I figured > it want that big of a deal, but if you are interested in reviewing these > changes, that’s great to hear. I'd be interested if it was for my driver, and I assume other maintainers feel the same way. I'd at least like to check for things like whether the added code is inside a function. :) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] configure: remove unneeded XCB_REQUIRED
On Thu, 2018-03-15 at 09:37 +1000, Dave Airlie wrote: > On 15 March 2018 at 09:29, Andres Gomez wrote: > > It is only used for dri3 and xcb-dri3 and xcb-present were already > > mandating the minimal version, which is incoherent with the xcb one. > > We are in the middle of a thread discussing this area already, > probably don't need > this patch at this stage. Let's solve the problem first. Ouch! I did really miss the thread. Thanks for the heads up. I drop this patch and will follow the thread. Thanks! -- Br, Andres ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] FLAG-DAY: NIR derefs
On Wed, Mar 14, 2018 at 7:42 PM, Connor Abbott wrote: > On Wed, Mar 14, 2018 at 5:05 PM, Rob Clark wrote: >> On Wed, Mar 14, 2018 at 4:58 PM, Connor Abbott wrote: >>> FWIW, the way I imagined doing this was something like: >>> >>> 1. Add nir_deref_instr and nir_deref_type_pointer. At this point, just >>> make everything assert if the base deref isn't a nir_deref_var. This >>> will be a bit of a flag-day, but also very mechanical. It'll also help >>> us catch cases where we don't handle new-style derefs later. >>> 2. Add a pass to flatten nir_deref_type_pointer into >>> nir_deref_type_var if possible (i.e. if there's a clear chain up to >>> the base variable without any phi nodes or whatever). This should >>> always be possible for GLSL, as well as SPIR-V unless >>> KHR_variable_pointers is enabled. We'll use this to avoid too much >>> churn in drivers, passes that haven't been updated, etc. We might also >>> want a pass to do the opposite, for converting passes where we don't >>> want to have codepaths for both forms at once. >> >> btw, does it seem reasonable to assert that deref instruction src's >> are *always* in SSA form? That seems reasonable to me since they will >> be mostly lowered away before the driver sees them (and I think makes >> some of the operation on them easier), and I can't think of any way >> for them *not* to be SSA (since they aren't real instructions). > > I think so... as long as you don't lower locals to regs before > lowering everything to explicit address arithmetic. Although, with the > physical memory model, it's just another source like any other so I'm > not sure if there's a point. > I think w/ phys memory model, we could lower away the deref's before going to regs. That *seems* like a reasonable requirement to me. >> >> If so, my rough thoughts are a deref instruction chain (formed by ssa >> links to previous deref instruction) either start w/ >> nir_deref_instr_pointer or nir_deref_instruction_var instructions at >> the head of the list (to start, I guess you could ignore adding the >> nir_deref_instr_pointer instruction and I could add that for >> clover/spirv work). Followed by N links of struct/array deref_link >> instructions that have two ssa src's (one that is previous deref >> instruction and one that is array or struct member offset) > > Why would you need a separate nir_deref_instr_pointer? Do you want to > put information like what type of pointer it is in there? Maybe we > could just make that part of every nir_deref_instr instead? well, in clc you could hypotheticaly do something like: __global struct Foo *f = (struct Foo *)0x1234; so you don't necessarily have a var at the start of your deref chain. More realistic example is: ptr->a.b->c.d which is really two deref chains, first starting at a var, second starting at an ssa ptr (which I think realistically ends up needing to be a fat pointer to deal w/ cl's multiple address spaces[1]), with an intermediate load_global or load_shared intrinsic in between. Anyways, don't want to derail the conversion to deref instructions too much, but I do think we need something different for "var" vs "ptr" (and the nice thing about deref chains is this should be easier to add) BR, -R [1] kinda a different topic.. short version is I'm leaning towards a nir_deref_instr_pointer taking a two component vector as it's src so it can be lowered to an if/else chain to deal with different address spaces, and then let opt passes clean things up so driver ends up with either load/store_global or load/store_local, etc > >> >>> 3. Modify nir_lower_io to handle new-style derefs, especially for >>> shared variables (i.e. KHR_variable_pointers for anv). We might have >>> to modify a few other passes, too. >>> 4. Add the required deref lowering passes to all drivers. >>> 5. Rewrite glsl_to_nir and spirv_to_nir to emit the new-style derefs. >>> At the very least, we should be using this to implement the shared >>> variable bits of KHR_variable_pointers. If we add stride/offset >>> annotations to nir_deref_instr for UBO's and SSBO's, then we might >>> also be able to get rid of the vtn_deref stuff entirely (although I'm >>> not sure if that should be a goal right now). >> >> I think I might try to prototype something where we convert vtn over >> to new-style deref instructions, plus a pass to lower to old style >> deref chains. It partly comes down to how quickly I can finish a >> couple other things, and how much I can't sleep on a long-ass flight. >> (I guess even if throw-away, if it gives some idea of what to do or >> what not to do it might be useful?) >> >> Anyways, as far as decoupling this from backend drivers, I think a >> nir_intr_get_var(intr, n) instruction to replace open coded >> intr->variables[0]->var could go a long way. (In the new world this >> would follow ssa links to previous deref instruction to find the >> nir_deref_instruction_var.) I'll try typing this up in a few minutes. >> >>> At this point
Re: [Mesa-dev] FLAG-DAY: NIR derefs
On Wed, Mar 14, 2018 at 5:05 PM, Rob Clark wrote: > On Wed, Mar 14, 2018 at 4:58 PM, Connor Abbott wrote: >> FWIW, the way I imagined doing this was something like: >> >> 1. Add nir_deref_instr and nir_deref_type_pointer. At this point, just >> make everything assert if the base deref isn't a nir_deref_var. This >> will be a bit of a flag-day, but also very mechanical. It'll also help >> us catch cases where we don't handle new-style derefs later. >> 2. Add a pass to flatten nir_deref_type_pointer into >> nir_deref_type_var if possible (i.e. if there's a clear chain up to >> the base variable without any phi nodes or whatever). This should >> always be possible for GLSL, as well as SPIR-V unless >> KHR_variable_pointers is enabled. We'll use this to avoid too much >> churn in drivers, passes that haven't been updated, etc. We might also >> want a pass to do the opposite, for converting passes where we don't >> want to have codepaths for both forms at once. > > btw, does it seem reasonable to assert that deref instruction src's > are *always* in SSA form? That seems reasonable to me since they will > be mostly lowered away before the driver sees them (and I think makes > some of the operation on them easier), and I can't think of any way > for them *not* to be SSA (since they aren't real instructions). I think so... as long as you don't lower locals to regs before lowering everything to explicit address arithmetic. Although, with the physical memory model, it's just another source like any other so I'm not sure if there's a point. > > If so, my rough thoughts are a deref instruction chain (formed by ssa > links to previous deref instruction) either start w/ > nir_deref_instr_pointer or nir_deref_instruction_var instructions at > the head of the list (to start, I guess you could ignore adding the > nir_deref_instr_pointer instruction and I could add that for > clover/spirv work). Followed by N links of struct/array deref_link > instructions that have two ssa src's (one that is previous deref > instruction and one that is array or struct member offset) Why would you need a separate nir_deref_instr_pointer? Do you want to put information like what type of pointer it is in there? Maybe we could just make that part of every nir_deref_instr instead? > >> 3. Modify nir_lower_io to handle new-style derefs, especially for >> shared variables (i.e. KHR_variable_pointers for anv). We might have >> to modify a few other passes, too. >> 4. Add the required deref lowering passes to all drivers. >> 5. Rewrite glsl_to_nir and spirv_to_nir to emit the new-style derefs. >> At the very least, we should be using this to implement the shared >> variable bits of KHR_variable_pointers. If we add stride/offset >> annotations to nir_deref_instr for UBO's and SSBO's, then we might >> also be able to get rid of the vtn_deref stuff entirely (although I'm >> not sure if that should be a goal right now). > > I think I might try to prototype something where we convert vtn over > to new-style deref instructions, plus a pass to lower to old style > deref chains. It partly comes down to how quickly I can finish a > couple other things, and how much I can't sleep on a long-ass flight. > (I guess even if throw-away, if it gives some idea of what to do or > what not to do it might be useful?) > > Anyways, as far as decoupling this from backend drivers, I think a > nir_intr_get_var(intr, n) instruction to replace open coded > intr->variables[0]->var could go a long way. (In the new world this > would follow ssa links to previous deref instruction to find the > nir_deref_instruction_var.) I'll try typing this up in a few minutes. > >> At this point, we can fix things up and move everything else over to >> new-style derefs at our leisure. Also, it should now be pretty >> straightforward to add support for shared variable pointers to radv >> without lowering everything to offsets up-front, which is nice. >> >> Connor >> >> >> On Wed, Mar 14, 2018 at 2:32 PM, Jason Ekstrand wrote: >>> All, >>> >>> Connor and I along with several others have been discussing for a while >>> changing the way NIR dereferences work. In particular, adding a new >>> nir_deref_instr type where the first one in the chain takes a variable and >>> is followed by a series of instructions which take another deref instruction >>> and do an array or structure dereference on it. >>> >>> Much of the motivation for this is some of the upcoming SPIR-V stuff where >>> we have more real pointers and deref chains don't really work anymore. It >>> will also allow for things such as CSE of common derefs which could make >>> analysis easier. This is similar to what LLVM does and it's working very >>> well for them. >>> >>> The reason for this e-mail is that this is going to be a flag-day change. >>> We've been talking about it for a while but this is going to be a major and >>> fairly painful change in the short term so no one has actually done it. >>> It's time we finally j
Re: [Mesa-dev] [PATCH mesa 1/2] Add processor topology calculation implementation for Darwin/OSX targets.
Subject should have a swr prefix or similar. On Wed, Mar 14, 2018 at 4:19 PM, Jeremy Huddleston Sequoia wrote: > From: Apple SWE Explain? > > The implementation for bootstrapping SWR on Darwin targets is based on the > Linux version. > Instead of reading the output of /proc/cpuinfo, sysctlbyname is used to > determine the > physical identifiers, processor identifiers, core counts and thread-processor > affinities. > > With this patch, it is possible to use SWR as an alternate renderer on OSX to > softpipe and > llvmpipe. All of these look like they're too long to fix in 80 columns in git show. > Reviewed-by: Jeremy Huddleston Sequoia I'm guessing you're just pushing code someone else wrote... > Signed-off-by: Jeremy Huddleston Sequoia We don't have a DCO, so Signed-off-by has no meaning. Worse, in the case we added a DCO in the future, all of the stupid Signed-off-by's people have been cargo culting for years would confuse everything. Please stop doing it. Please don't push code to maintained drivers without going through the mailing list. I feel like I shouldn't have to say that. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] configure: remove unneeded XCB_REQUIRED
On 15 March 2018 at 09:29, Andres Gomez wrote: > It is only used for dri3 and xcb-dri3 and xcb-present were already > mandating the minimal version, which is incoherent with the xcb one. We are in the middle of a thread discussing this area already, probably don't need this patch at this stage. Let's solve the problem first. Dave. > > This also makes configure.ac more homogeneous with other modules, like > dri2 or glx, which also need xcb but get the minimal version from > xcb-dri2 or xcb-glx, respectively. > > Cc: Emil Velikov > Cc: Eric Engestrom > Signed-off-by: Andres Gomez > --- > configure.ac | 3 +-- > 1 file changed, 1 insertion(+), 2 deletions(-) > > diff --git a/configure.ac b/configure.ac > index 621dc328d90..4392a427699 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -90,7 +90,6 @@ LIBVA_REQUIRED=0.38.0 > VDPAU_REQUIRED=1.1 > WAYLAND_REQUIRED=1.11 > WAYLAND_PROTOCOLS_REQUIRED=1.8 > -XCB_REQUIRED=1.9.3 > XCBDRI2_REQUIRED=1.8 > XCBDRI3_REQUIRED=1.13 > XCBGLX_REQUIRED=1.8.1 > @@ -1850,7 +1849,7 @@ fi > if test x"$enable_dri3" = xyes; then > DEFINES="$DEFINES -DHAVE_DRI3" > > -dri3_modules="x11-xcb xcb >= $XCB_REQUIRED xcb-dri3 >= $XCBDRI3_REQUIRED > xcb-xfixes xcb-present >= $XCBPRESENT_REQUIRED xcb-sync xshmfence >= > $XSHMFENCE_REQUIRED" > +dri3_modules="x11-xcb xcb xcb-dri3 >= $XCBDRI3_REQUIRED xcb-xfixes > xcb-present >= $XCBPRESENT_REQUIRED xcb-sync xshmfence >= $XSHMFENCE_REQUIRED" > PKG_CHECK_MODULES([XCB_DRI3], [$dri3_modules]) > fi > > -- > 2.16.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH mesa 2/2] sched.h needs to be imported on Darwin/OSX targets.
From: Apple SWE sched_yield is used but the include reference on Darwin is missing. This patch conditionally guards on Darwin/OSX to import sched.h first. Reviewed-by: Jeremy Huddleston Sequoia Signed-off-by: Jeremy Huddleston Sequoia --- src/gallium/drivers/swr/swr_fence.cpp | 4 1 file changed, 4 insertions(+) diff --git a/src/gallium/drivers/swr/swr_fence.cpp b/src/gallium/drivers/swr/swr_fence.cpp index 3005eb9aaa..b05ac8cec0 100644 --- a/src/gallium/drivers/swr/swr_fence.cpp +++ b/src/gallium/drivers/swr/swr_fence.cpp @@ -29,6 +29,10 @@ #include "swr_screen.h" #include "swr_fence.h" +#ifdef __APPLE__ +#include +#endif + #if defined(PIPE_CC_MSVC) // portable thread yield #define sched_yield SwitchToThread #endif -- 2.16.1 (Apple Git-102) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH mesa 1/2] Add processor topology calculation implementation for Darwin/OSX targets.
From: Apple SWE The implementation for bootstrapping SWR on Darwin targets is based on the Linux version. Instead of reading the output of /proc/cpuinfo, sysctlbyname is used to determine the physical identifiers, processor identifiers, core counts and thread-processor affinities. With this patch, it is possible to use SWR as an alternate renderer on OSX to softpipe and llvmpipe. Reviewed-by: Jeremy Huddleston Sequoia Signed-off-by: Jeremy Huddleston Sequoia --- .../drivers/swr/rasterizer/core/threads.cpp| 56 +- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp index 4d79168d2d..3eb20abcbf 100644 --- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp @@ -36,6 +36,11 @@ #include #endif +#ifdef __APPLE__ +#include +#include +#endif + #include "common/os.h" #include "context.h" #include "frontend.h" @@ -219,6 +224,56 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread #elif defined(__APPLE__) +auto numProcessors = 0; +auto numCores = 0; +auto numPhysicalIds = 0; + +int value; +size_t size = sizeof(value); + +int result = sysctlbyname("hw.packages", &value, &size, NULL, 0); +SWR_ASSERT(result == 0); +numPhysicalIds = value; + +result = sysctlbyname("hw.logicalcpu", &value, &size, NULL, 0); +SWR_ASSERT(result == 0); +numProcessors = value; + +result = sysctlbyname("hw.physicalcpu", &value, &size, NULL, 0); +SWR_ASSERT(result == 0); +numCores = value; + +out_nodes.resize(numPhysicalIds); + +for (auto physId = 0; physId < numPhysicalIds; ++physId) +{ +auto &numaNode = out_nodes[physId]; +auto procId = 0; + +numaNode.cores.resize(numCores); + +while (procId < numProcessors) +{ +for (auto coreId = 0; coreId < numaNode.cores.size(); ++coreId, ++procId) +{ +auto &core = numaNode.cores[coreId]; + +core.procGroup = coreId; +core.threadIds.push_back(procId); +} +} +} + +out_numThreadsPerProcGroup = 0; + +for (auto &node : out_nodes) +{ +for (auto &core : node.cores) +{ +out_numThreadsPerProcGroup += core.threadIds.size(); +} +} + #else #error Unsupported platform @@ -253,7 +308,6 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread } } - void bindThread(SWR_CONTEXT* pContext, uint32_t threadId, uint32_t procGroupId = 0, bool bindProcGroup=false) { // Only bind threads when MAX_WORKER_THREADS isn't set. -- 2.16.1 (Apple Git-102) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] configure: remove unneeded XCB_REQUIRED
It is only used for dri3 and xcb-dri3 and xcb-present were already mandating the minimal version, which is incoherent with the xcb one. This also makes configure.ac more homogeneous with other modules, like dri2 or glx, which also need xcb but get the minimal version from xcb-dri2 or xcb-glx, respectively. Cc: Emil Velikov Cc: Eric Engestrom Signed-off-by: Andres Gomez --- configure.ac | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 621dc328d90..4392a427699 100644 --- a/configure.ac +++ b/configure.ac @@ -90,7 +90,6 @@ LIBVA_REQUIRED=0.38.0 VDPAU_REQUIRED=1.1 WAYLAND_REQUIRED=1.11 WAYLAND_PROTOCOLS_REQUIRED=1.8 -XCB_REQUIRED=1.9.3 XCBDRI2_REQUIRED=1.8 XCBDRI3_REQUIRED=1.13 XCBGLX_REQUIRED=1.8.1 @@ -1850,7 +1849,7 @@ fi if test x"$enable_dri3" = xyes; then DEFINES="$DEFINES -DHAVE_DRI3" -dri3_modules="x11-xcb xcb >= $XCB_REQUIRED xcb-dri3 >= $XCBDRI3_REQUIRED xcb-xfixes xcb-present >= $XCBPRESENT_REQUIRED xcb-sync xshmfence >= $XSHMFENCE_REQUIRED" +dri3_modules="x11-xcb xcb xcb-dri3 >= $XCBDRI3_REQUIRED xcb-xfixes xcb-present >= $XCBPRESENT_REQUIRED xcb-sync xshmfence >= $XSHMFENCE_REQUIRED" PKG_CHECK_MODULES([XCB_DRI3], [$dri3_modules]) fi -- 2.16.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] anv/pipeline: don't pass constant view index in multiview
On Tue, Feb 27, 2018 at 12:13:52PM -0800, Jason Ekstrand wrote: > > diff --git a/src/intel/vulkan/anv_nir_lower_multiview.c > > b/src/intel/vulkan/anv_nir_lower_multiview.c > > index d2aefdee62..365a70d757 100644 > > --- a/src/intel/vulkan/anv_nir_lower_multiview.c > > +++ b/src/intel/vulkan/anv_nir_lower_multiview.c > > @@ -72,7 +72,8 @@ build_view_index(struct lower_multiview_state *state) > >b->cursor = nir_before_block(nir_start_block(b->impl)); > > > >assert(state->view_mask != 0); > > - if (0 && _mesa_bitcount(state->view_mask) == 1) { > > + if (_mesa_bitcount(state->view_mask) == 1) { > > > > Yes, I think it's probably safe to turn this on now. Originally, I had it > commented out because I was afraid of not getting enough test coverage. > For all I know, we still aren't getting enough test coverage but I think > we've proven by now that the calculations below work. There are now CTS tests covering multiview, including cases with a single view set in the view_mask. > > + /* Unless there is only one possible view index (that would be set > > + * directly), pass it to the next stage. */ > > > > With multi-line comments, we usually put the "*/" on its own line. Just sent v2 fixing this. Thanks, Caio ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105442] Hang when running nine ff lighting shader with radeonsi
https://bugs.freedesktop.org/show_bug.cgi?id=105442 Axel Davy changed: What|Removed |Added Summary|Hang when running nine ff |Hang when running nine ff |lighting shader |lighting shader with ||radeonsi --- Comment #1 from Axel Davy --- I tested with a slightly older version of llvm I had locally that was based on llvm git from june, and the same behaviour occurs. It could be a radeonsi bug introduced in the llvm asm generation or an llvm bug uncovered by a change in that asm. I filled a bug on the llvm side as it's likely involved: https://bugs.llvm.org/show_bug.cgi?id=36704 -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 1/2] anv/pipeline: use less instructions for multiview
The view_index is encoded in the remainder of dividing instance id by the number of views in the view mask (n). In the general case (handled by the else clause), there is a need to map from 0..n-1 into the number of the view being masked. For that a map is encoded. In the case only the first n bits in the mask are set, the mapping is trivial, 0..n-1 already represent what view is being referred to. That case was in the original patch that added anv_nir_lower_multiview.c but disabled. --- src/intel/vulkan/anv_nir_lower_multiview.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/intel/vulkan/anv_nir_lower_multiview.c b/src/intel/vulkan/anv_nir_lower_multiview.c index 88e6f9af87..d2aefdee62 100644 --- a/src/intel/vulkan/anv_nir_lower_multiview.c +++ b/src/intel/vulkan/anv_nir_lower_multiview.c @@ -86,7 +86,7 @@ build_view_index(struct lower_multiview_state *state) nir_umod(b, nir_load_instance_id(b), nir_imm_int(b, _mesa_bitcount(state->view_mask))); - if (0 && util_is_power_of_two(state->view_mask + 1)) { + if (util_is_power_of_two(state->view_mask + 1)) { /* If we have a full view mask, then compacted is what we want */ state->view_index = compacted; } else { -- 2.16.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 2/2] anv/pipeline: don't pass constant view index in multiview
If view mask has only one bit set, view index is effectively a constant, so doesn't need to be passed to the next stages, just always set it. Part of this was in the original patch that added anv_nir_lower_multiview.c but disabled. v2: Fixed comment style. --- src/intel/vulkan/anv_nir_lower_multiview.c | 18 -- 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/intel/vulkan/anv_nir_lower_multiview.c b/src/intel/vulkan/anv_nir_lower_multiview.c index d2aefdee62..a81210adbc 100644 --- a/src/intel/vulkan/anv_nir_lower_multiview.c +++ b/src/intel/vulkan/anv_nir_lower_multiview.c @@ -72,7 +72,8 @@ build_view_index(struct lower_multiview_state *state) b->cursor = nir_before_block(nir_start_block(b->impl)); assert(state->view_mask != 0); - if (0 && _mesa_bitcount(state->view_mask) == 1) { + if (_mesa_bitcount(state->view_mask) == 1) { + /* Set the view index directly. */ state->view_index = nir_imm_int(b, ffs(state->view_mask) - 1); } else if (state->builder.shader->info.stage == MESA_SHADER_VERTEX) { /* We only support 16 viewports */ @@ -210,11 +211,16 @@ anv_nir_lower_multiview(nir_shader *shader, uint32_t view_mask) assert(view_index->parent_instr->block == nir_start_block(entrypoint)); b->cursor = nir_after_instr(view_index->parent_instr); - nir_variable *view_index_out = - nir_variable_create(shader, nir_var_shader_out, - glsl_int_type(), "view index"); - view_index_out->data.location = VARYING_SLOT_VIEW_INDEX; - nir_store_var(b, view_index_out, view_index, 0x1); + /* Unless there is only one possible view index (that would be set + * directly), pass it to the next stage. + */ + if (_mesa_bitcount(state.view_mask) != 1) { + nir_variable *view_index_out = +nir_variable_create(shader, nir_var_shader_out, +glsl_int_type(), "view index"); + view_index_out->data.location = VARYING_SLOT_VIEW_INDEX; + nir_store_var(b, view_index_out, view_index, 0x1); + } nir_variable *layer_id_out = nir_variable_create(shader, nir_var_shader_out, -- 2.16.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105442] Hang when running nine ff lighting shader
https://bugs.freedesktop.org/show_bug.cgi?id=105442 Axel Davy changed: What|Removed |Added QA Contact|dri-devel@lists.freedesktop |mesa-dev@lists.freedesktop. |.org|org Assignee|dri-devel@lists.freedesktop |mesa-dev@lists.freedesktop. |.org|org -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH shaderdb 2/3] run: new '--pci-id' option for overriding pci-id
I see. Because user will be putting PCI_ID instead if a specific device variant is required. I shouldn't have been confused in the first place :-). Thanks, On Wed, Mar 14, 2018 at 04:03:05PM -0700, Kenneth Graunke wrote: > On Wednesday, March 14, 2018 3:43:18 PM PDT Dongwon Kim wrote: > > Yeah, thought about that (checking name then -> try to parse it as PCI-ID) > > but didn't implement it because it won't work when there are multiple > > different PCI-ID bound to same 'name' (e.g. want to use a specific PCI-ID > > hsw). But wait a minite I think the opposite way (check if it's PCI-ID > > first) should cover that case > > > > I will upload v2 with this change shortly. > > It should work either way... 'hsw' would pick some arbitrary Haswell > PCI ID (if you don't care which one), and 0xD26 would pick a specific > Haswell PCI ID. > > --Ken ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] FLAG-DAY: NIR derefs
On Wed, Mar 14, 2018 at 4:58 PM, Connor Abbott wrote: > FWIW, the way I imagined doing this was something like: > > 1. Add nir_deref_instr and nir_deref_type_pointer. At this point, just > make everything assert if the base deref isn't a nir_deref_var. This > will be a bit of a flag-day, but also very mechanical. It'll also help > us catch cases where we don't handle new-style derefs later. > 2. Add a pass to flatten nir_deref_type_pointer into > nir_deref_type_var if possible (i.e. if there's a clear chain up to > the base variable without any phi nodes or whatever). This should > always be possible for GLSL, as well as SPIR-V unless > KHR_variable_pointers is enabled. We'll use this to avoid too much > churn in drivers, passes that haven't been updated, etc. We might also > want a pass to do the opposite, for converting passes where we don't > want to have codepaths for both forms at once. btw, does it seem reasonable to assert that deref instruction src's are *always* in SSA form? That seems reasonable to me since they will be mostly lowered away before the driver sees them (and I think makes some of the operation on them easier), and I can't think of any way for them *not* to be SSA (since they aren't real instructions). If so, my rough thoughts are a deref instruction chain (formed by ssa links to previous deref instruction) either start w/ nir_deref_instr_pointer or nir_deref_instruction_var instructions at the head of the list (to start, I guess you could ignore adding the nir_deref_instr_pointer instruction and I could add that for clover/spirv work). Followed by N links of struct/array deref_link instructions that have two ssa src's (one that is previous deref instruction and one that is array or struct member offset) > 3. Modify nir_lower_io to handle new-style derefs, especially for > shared variables (i.e. KHR_variable_pointers for anv). We might have > to modify a few other passes, too. > 4. Add the required deref lowering passes to all drivers. > 5. Rewrite glsl_to_nir and spirv_to_nir to emit the new-style derefs. > At the very least, we should be using this to implement the shared > variable bits of KHR_variable_pointers. If we add stride/offset > annotations to nir_deref_instr for UBO's and SSBO's, then we might > also be able to get rid of the vtn_deref stuff entirely (although I'm > not sure if that should be a goal right now). I think I might try to prototype something where we convert vtn over to new-style deref instructions, plus a pass to lower to old style deref chains. It partly comes down to how quickly I can finish a couple other things, and how much I can't sleep on a long-ass flight. (I guess even if throw-away, if it gives some idea of what to do or what not to do it might be useful?) Anyways, as far as decoupling this from backend drivers, I think a nir_intr_get_var(intr, n) instruction to replace open coded intr->variables[0]->var could go a long way. (In the new world this would follow ssa links to previous deref instruction to find the nir_deref_instruction_var.) I'll try typing this up in a few minutes. > At this point, we can fix things up and move everything else over to > new-style derefs at our leisure. Also, it should now be pretty > straightforward to add support for shared variable pointers to radv > without lowering everything to offsets up-front, which is nice. > > Connor > > > On Wed, Mar 14, 2018 at 2:32 PM, Jason Ekstrand wrote: >> All, >> >> Connor and I along with several others have been discussing for a while >> changing the way NIR dereferences work. In particular, adding a new >> nir_deref_instr type where the first one in the chain takes a variable and >> is followed by a series of instructions which take another deref instruction >> and do an array or structure dereference on it. >> >> Much of the motivation for this is some of the upcoming SPIR-V stuff where >> we have more real pointers and deref chains don't really work anymore. It >> will also allow for things such as CSE of common derefs which could make >> analysis easier. This is similar to what LLVM does and it's working very >> well for them. >> >> The reason for this e-mail is that this is going to be a flag-day change. >> We've been talking about it for a while but this is going to be a major and >> fairly painful change in the short term so no one has actually done it. >> It's time we finally just suck it up and make it happen. While we will try >> to make the change as incrementally and reviewably as possible but there is >> a real limit as to what is possible here. My plan is to start cracking away >> at this on Monday and hopefully have something working for i965/anv by the >> end of the week or maybe some time the week after. If anyone has something >> to say in opposition, please speak up now and not after I've spent a week >> straight frantically hacking on NIR. >> >> I would like everyone to be respectful of the fact that this will be a major >> change and very pai
Re: [Mesa-dev] [PATCH shaderdb 2/3] run: new '--pci-id' option for overriding pci-id
On Wednesday, March 14, 2018 3:43:18 PM PDT Dongwon Kim wrote: > Yeah, thought about that (checking name then -> try to parse it as PCI-ID) > but didn't implement it because it won't work when there are multiple > different PCI-ID bound to same 'name' (e.g. want to use a specific PCI-ID > hsw). But wait a minite I think the opposite way (check if it's PCI-ID > first) should cover that case > > I will upload v2 with this change shortly. It should work either way... 'hsw' would pick some arbitrary Haswell PCI ID (if you don't care which one), and 0xD26 would pick a specific Haswell PCI ID. --Ken signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH shaderdb 3/3] run: shader program file created via GetProgramBinary (v3)
Thanks for the review, Ken I agree on most of your proposals. I will upload another version shortly. On Wed, Mar 14, 2018 at 03:10:25PM -0700, Kenneth Graunke wrote: > On Monday, February 26, 2018 2:17:05 PM PDT Dongwon Kim wrote: > > extraction of linked binary program to a file using glGetProgramBinary. > > This file is intended to be loaded by glProgramBinary in the graphic > > application running on the target system. > > > > To enable this feature, a new option '--bin' has to be passed to the > > program execution. > > > > v2: 1. define MAX_LOG_LEN and use it as the size of gl log > > 2. define MAX_PROG_SIZE and use it as the max size of extracted > >shader_program > > 3. out_file is now pointer allocated by strdup for the file name > > > > v3: 1. automatically using original shader test file's name + ".bin" > >as a filename for program binary - better way to cover the case > >with batch compilation of many shader test files in the same > >directory > > 2. remove --out= since it is now unnecessary (due to v3-1.) > >to provide custom file name. Instead, option, "--bin", which is > >basically a flag that enables getting program binary as a file. > > 3. Now it tries to get the length of binary by reading program's > >GL_PROGRAM_BINARY_LENGTH_OES parameter > > > > Signed-off-by: Dongwon Kim > > --- > > run.c | 68 > > +++ > > 1 file changed, 64 insertions(+), 4 deletions(-) > > > > diff --git a/run.c b/run.c > > index d066567..bbab5d9 100644 > > --- a/run.c > > +++ b/run.c > > @@ -52,6 +52,9 @@ > > > > #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) > > > > +#define MAX_LOG_LEN 4096 > > +#define MAX_PROG_SIZE (10*1024*1024) /* maximum 10MB for shader program */ > > + > > struct context_info { > > char *extension_string; > > int extension_string_len; > > @@ -358,18 +361,20 @@ const struct platform platforms[] = { > > enum > > { > > PCI_ID_OVERRIDE_OPTION = CHAR_MAX + 1, > > +LOADABLE_PROGRAM_BINARY_OPTION, > > }; > > > > const struct option const long_options[] = > > { > > {"pciid", required_argument, NULL, PCI_ID_OVERRIDE_OPTION}, > > +{"bin", no_argument, NULL, LOADABLE_PROGRAM_BINARY_OPTION}, > > This sounds like we're loading binaries. Can we call it > GENERATE_PROGRAM_BINARY_OPTION instead? Yeah, I will change this. > > > {NULL, 0, NULL, 0} > > }; > > > > void print_usage(const char *prog_name) > > { > > fprintf(stderr, > > -"Usage: %s [-d ] [-j ] [-o ] [-p > > ] [--pciid=] > *.shader_test files>\n", > > +"Usage: %s [-d ] [-j ] [-o ] [-p > > ] [--pciid=] > *.shader_test files>\n", > > prog_name); > > } > > > > @@ -450,6 +455,7 @@ main(int argc, char **argv) > > int opt; > > bool platf_overridden = 0; > > bool pci_id_overridden = 0; > > +bool enable_prog_bin = 0; > > Maybe generate_prog_bin here as well. sure. > > > > > max_threads = omp_get_max_threads(); > > > > @@ -518,6 +524,9 @@ main(int argc, char **argv) > > setenv("INTEL_DEVID_OVERRIDE", optarg, 1); > > pci_id_overridden = 1; > > break; > > +case LOADABLE_PROGRAM_BINARY_OPTION: > > +enable_prog_bin = 1; > > +break; > > default: > > fprintf(stderr, "Unknown option: %x\n", opt); > > print_usage(argv[0]); > > @@ -858,18 +867,18 @@ main(int argc, char **argv) > > } > > } else if (type == TYPE_CORE || type == TYPE_COMPAT || type == > > TYPE_ES) { > > GLuint prog = glCreateProgram(); > > +GLint param; > > So...putting this here means that you're not going to support generating > program binaries for SSO-based programs. That seems a bit unfortunate... > I can consider this later. > > > > for (unsigned i = 0; i < num_shaders; i++) { > > GLuint s = glCreateShader(shader[i].type); > > glShaderSource(s, 1, &shader[i].text, > > &shader[i].length); > > glCompileShader(s); > > > > -GLint param; > > glGetShaderiv(s, GL_COMPILE_STATUS, ¶m); > > if (unlikely(!param)) { > > -GLchar log[4096]; > > +GLchar log[MAX_LOG_LEN]; > > GLsizei length; > > -glGetShaderInfoLog(s, 4096, &length, log); > > +glGetShaderInfoLog(s, sizeof(log), &length, log); > > It would be nice to make a helper function for getting the info log and > printing an error, since you've now got it twice. Should probably be a > separate patch (and include the MAX_LOG_LEN change). > I will work on it (another patch.) > > > > fprintf(stderr, "ERROR: %s fa
Re: [Mesa-dev] [PATCH shaderdb 2/3] run: new '--pci-id' option for overriding pci-id
Yeah, I am using "intel_run" script that lets "run" use intel_stub layer instead. Pretty useful.. On Wed, Mar 14, 2018 at 02:54:08PM -0700, Kenneth Graunke wrote: > On Monday, February 12, 2018 5:26:15 PM PDT Dongwon Kim wrote: > > Add a new option, '--pciid' to override a pci id of the target arch > > to support cross-architecture shader compilation. Not like "-p" option, > > it is for accepting any GFX devices supported by the driver. > > > > Setting both "-p" and "--pciid" is blocked to avoid conflict. > > > > Signed-off-by: Dongwon Kim > > --- > > run.c | 44 ++-- > > 1 file changed, 42 insertions(+), 2 deletions(-) > > Oh, another thing I forgot to mention - you might find intel_run and > intel_stub.c to be useful. They allow you to emulate any Intel GPU > for purposes of running shader-db, without needing to have any graphics > hardware present in your system. This can be useful when building on > Xeon build servers or the like... ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH shaderdb 2/3] run: new '--pci-id' option for overriding pci-id
Yeah, thought about that (checking name then -> try to parse it as PCI-ID) but didn't implement it because it won't work when there are multiple different PCI-ID bound to same 'name' (e.g. want to use a specific PCI-ID hsw). But wait a minite I think the opposite way (check if it's PCI-ID first) should cover that case I will upload v2 with this change shortly. On Wed, Mar 14, 2018 at 02:52:36PM -0700, Kenneth Graunke wrote: > On Monday, February 12, 2018 5:26:15 PM PDT Dongwon Kim wrote: > > Add a new option, '--pciid' to override a pci id of the target arch > > to support cross-architecture shader compilation. Not like "-p" option, > > it is for accepting any GFX devices supported by the driver. > > > > Setting both "-p" and "--pciid" is blocked to avoid conflict. > > > > Signed-off-by: Dongwon Kim > > --- > > run.c | 44 ++-- > > 1 file changed, 42 insertions(+), 2 deletions(-) > > Hi Dongwon, > > It looks like this does the exact same thing as -p, but it accepts > arbitrary numerical PCI IDs instead of a platform name. IMHO, I think > we should just make -p try parsing it as a number of it doesn't match > any of the names. Then either -p bdw or -p 0x1616 would work. > > It seems much less complicated that way. > > --Ken ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] Releasing 18.0
Hi Emil, all, I think we've closed the last of the Mesa 18.0 blocker bugs. It looks like there are some patches nominated for the 18.0 branch still (fixing some of those issues), but assuming things are merged, I think we're ready to release. I checked with Mark and Jason on IRC and they seemed fine with shipping. Thanks! --Ken signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH shader-db 4/4] run: handling binding of attribute variable name
On Friday, March 9, 2018 2:28:36 PM PDT Dongwon Kim wrote: > Optional binding of variables can be processed before linking shader > objects for creating shader program. It is activated by adding lines > with a keyword "BindAttribLoc" followed by name and index as, > > "BindAttribLoc name_str1 " > > For example, > > [require] > .. > BindAttrbLoc vertex 1 > BindAttrbLoc coord 2 > BindAttrbLoc col 3 > > This makes the shader-db run > > glBindAttribLocation(p, 1, "vertex"); > glBindAttribLocation(p, 2, "coord"); > glBindAttribLocation(p, 3, "col"); > > before glLinkProgram() to include these binding info in binary shader > program. > > Signed-off-by: Dongwon Kim Matt, do you have an opinion on this? This seems like the sort of commands that would normally go in the [test] block, rather than the [require] block. But it looks like shader_runner doesn't have any syntax for glBindAttribLocation today. It's definitely a useful thing to have if we're going to use run.c to produce shader binaries for ARB_get_program_binary... > --- > run.c | 79 > +++ > 1 file changed, 79 insertions(+) > > diff --git a/run.c b/run.c > index bbab5d9..fe2a97a 100644 > --- a/run.c > +++ b/run.c > @@ -76,6 +76,12 @@ struct shader { > int type; > }; > > +struct binding_var { > +char *name; > +GLint index; > +struct binding_var *next; > +}; > + > static bool > extension_in_string(const char *haystack, const char *needle) > { > @@ -105,6 +111,10 @@ extension_in_string(const char *haystack, const char > *needle) > return false; > } > > +#define SKIP_SPACES(str) while (*(str) == ' ') str++ > + > +struct binding_var binding_head = {"NULL", -1, NULL}; > + > static struct shader * > get_shaders(const struct context_info *core, const struct context_info > *compat, > const struct context_info *es, > @@ -120,6 +130,7 @@ get_shaders(const struct context_info *core, const struct > context_info *compat, > static const char *fp_req = "\nGL_ARB_fragment_program"; > static const char *vp_req = "\nGL_ARB_vertex_program"; > static const char *sso_req = "\nSSO ENABLED"; > +static const char *binding = "\nBindAttribLoc"; > static const char *gs = "geometry shader]\n"; > static const char *fs = "fragment "; > static const char *vs = "vertex "; > @@ -186,11 +197,13 @@ get_shaders(const struct context_info *core, const > struct context_info *compat, > const struct context_info *info = *type == TYPE_CORE ? core : compat; > > const char *extension_text = text; > + > while ((extension_text = memmem(extension_text, end_text - > extension_text, > "\nGL_", strlen("\nGL_"))) != NULL) { > extension_text += 1; > const char *newline = memchr(extension_text, '\n', > end_text - extension_text); > + > if (memmem(info->extension_string, info->extension_string_len, > extension_text, newline - extension_text) == NULL) { > fprintf(stderr, "SKIP: %s requires unavailable extension %.*s\n", > @@ -202,6 +215,62 @@ get_shaders(const struct context_info *core, const > struct context_info *compat, > } > } > > +/* process binding */ > +struct binding_var *binding_prev = &binding_head; > +const char *pre_binding_text = text; > + > +while ((pre_binding_text = memmem(pre_binding_text, end_text - > pre_binding_text, > + binding, strlen(binding))) != NULL) { > +pre_binding_text += strlen(binding); > + > +const char *newline = memchr(pre_binding_text, '\n', end_text - > pre_binding_text); > + > +SKIP_SPACES(pre_binding_text); > + > +char *endword = memchr(pre_binding_text, ' ', newline - > pre_binding_text); > + > +/* if there's no more space in the same line */ > +if (!endword) { > +fprintf(stderr, "SKIP: can't find attr index for this > binding\n"); > +continue; > +} > + > +char *binding_name = (char *)calloc(1, endword - pre_binding_text + > 1); > + > +strncpy(binding_name, pre_binding_text, endword - pre_binding_text); > + > +pre_binding_text = endword; > + > +SKIP_SPACES(pre_binding_text); > +if (*pre_binding_text == '\n') { > +fprintf(stderr, "SKIP: can't find attr variable name for this > binding\n"); > +continue; > +} > + > +endword = memchr(pre_binding_text, ' ', newline - pre_binding_text); > + > +if (!endword) > +endword = (char *)newline; > + > +char *index_string = calloc(1, endword - pre_binding_text + 1); > +strncpy(index_string, pre_binding_text, endword - pre_binding_text); > + > +struct binding_var *binding_new = malloc(sizeof(struct binding_var)); > + > +binding_new->index =
Re: [Mesa-dev] [PATCH shaderdb 3/3] run: shader program file created via GetProgramBinary (v3)
On Monday, February 26, 2018 2:17:05 PM PDT Dongwon Kim wrote: > extraction of linked binary program to a file using glGetProgramBinary. > This file is intended to be loaded by glProgramBinary in the graphic > application running on the target system. > > To enable this feature, a new option '--bin' has to be passed to the > program execution. > > v2: 1. define MAX_LOG_LEN and use it as the size of gl log > 2. define MAX_PROG_SIZE and use it as the max size of extracted >shader_program > 3. out_file is now pointer allocated by strdup for the file name > > v3: 1. automatically using original shader test file's name + ".bin" >as a filename for program binary - better way to cover the case >with batch compilation of many shader test files in the same >directory > 2. remove --out= since it is now unnecessary (due to v3-1.) >to provide custom file name. Instead, option, "--bin", which is >basically a flag that enables getting program binary as a file. > 3. Now it tries to get the length of binary by reading program's >GL_PROGRAM_BINARY_LENGTH_OES parameter > > Signed-off-by: Dongwon Kim > --- > run.c | 68 > +++ > 1 file changed, 64 insertions(+), 4 deletions(-) > > diff --git a/run.c b/run.c > index d066567..bbab5d9 100644 > --- a/run.c > +++ b/run.c > @@ -52,6 +52,9 @@ > > #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) > > +#define MAX_LOG_LEN 4096 > +#define MAX_PROG_SIZE (10*1024*1024) /* maximum 10MB for shader program */ > + > struct context_info { > char *extension_string; > int extension_string_len; > @@ -358,18 +361,20 @@ const struct platform platforms[] = { > enum > { > PCI_ID_OVERRIDE_OPTION = CHAR_MAX + 1, > +LOADABLE_PROGRAM_BINARY_OPTION, > }; > > const struct option const long_options[] = > { > {"pciid", required_argument, NULL, PCI_ID_OVERRIDE_OPTION}, > +{"bin", no_argument, NULL, LOADABLE_PROGRAM_BINARY_OPTION}, This sounds like we're loading binaries. Can we call it GENERATE_PROGRAM_BINARY_OPTION instead? > {NULL, 0, NULL, 0} > }; > > void print_usage(const char *prog_name) > { > fprintf(stderr, > -"Usage: %s [-d ] [-j ] [-o ] [-p > ] [--pciid=] *.shader_test files>\n", > +"Usage: %s [-d ] [-j ] [-o ] [-p > ] [--pciid=] *.shader_test files>\n", > prog_name); > } > > @@ -450,6 +455,7 @@ main(int argc, char **argv) > int opt; > bool platf_overridden = 0; > bool pci_id_overridden = 0; > +bool enable_prog_bin = 0; Maybe generate_prog_bin here as well. > > max_threads = omp_get_max_threads(); > > @@ -518,6 +524,9 @@ main(int argc, char **argv) > setenv("INTEL_DEVID_OVERRIDE", optarg, 1); > pci_id_overridden = 1; > break; > +case LOADABLE_PROGRAM_BINARY_OPTION: > +enable_prog_bin = 1; > +break; > default: > fprintf(stderr, "Unknown option: %x\n", opt); > print_usage(argv[0]); > @@ -858,18 +867,18 @@ main(int argc, char **argv) > } > } else if (type == TYPE_CORE || type == TYPE_COMPAT || type == > TYPE_ES) { > GLuint prog = glCreateProgram(); > +GLint param; So...putting this here means that you're not going to support generating program binaries for SSO-based programs. That seems a bit unfortunate... > > for (unsigned i = 0; i < num_shaders; i++) { > GLuint s = glCreateShader(shader[i].type); > glShaderSource(s, 1, &shader[i].text, &shader[i].length); > glCompileShader(s); > > -GLint param; > glGetShaderiv(s, GL_COMPILE_STATUS, ¶m); > if (unlikely(!param)) { > -GLchar log[4096]; > +GLchar log[MAX_LOG_LEN]; > GLsizei length; > -glGetShaderInfoLog(s, 4096, &length, log); > +glGetShaderInfoLog(s, sizeof(log), &length, log); It would be nice to make a helper function for getting the info log and printing an error, since you've now got it twice. Should probably be a separate patch (and include the MAX_LOG_LEN change). > > fprintf(stderr, "ERROR: %s failed to compile:\n%s\n", > current_shader_name, log); > @@ -879,6 +888,57 @@ main(int argc, char **argv) > } > > glLinkProgram(prog); > + > +glGetProgramiv(prog, GL_LINK_STATUS, ¶m); > +if (unlikely(!param)) { > + GLchar log[MAX_LOG_LEN]; > + GLsizei length; > + glGetProgramInfoLog(prog, sizeof(log), &length, log); > + > + fprintf(stderr, "ERROR: fa
Re: [Mesa-dev] [PATCH shaderdb 2/3] run: new '--pci-id' option for overriding pci-id
On Monday, February 12, 2018 5:26:15 PM PDT Dongwon Kim wrote: > Add a new option, '--pciid' to override a pci id of the target arch > to support cross-architecture shader compilation. Not like "-p" option, > it is for accepting any GFX devices supported by the driver. > > Setting both "-p" and "--pciid" is blocked to avoid conflict. > > Signed-off-by: Dongwon Kim > --- > run.c | 44 ++-- > 1 file changed, 42 insertions(+), 2 deletions(-) Oh, another thing I forgot to mention - you might find intel_run and intel_stub.c to be useful. They allow you to emulate any Intel GPU for purposes of running shader-db, without needing to have any graphics hardware present in your system. This can be useful when building on Xeon build servers or the like... signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH shaderdb 2/3] run: new '--pci-id' option for overriding pci-id
On Monday, February 12, 2018 5:26:15 PM PDT Dongwon Kim wrote: > Add a new option, '--pciid' to override a pci id of the target arch > to support cross-architecture shader compilation. Not like "-p" option, > it is for accepting any GFX devices supported by the driver. > > Setting both "-p" and "--pciid" is blocked to avoid conflict. > > Signed-off-by: Dongwon Kim > --- > run.c | 44 ++-- > 1 file changed, 42 insertions(+), 2 deletions(-) Hi Dongwon, It looks like this does the exact same thing as -p, but it accepts arbitrary numerical PCI IDs instead of a platform name. IMHO, I think we should just make -p try parsing it as a number of it doesn't match any of the names. Then either -p bdw or -p 0x1616 would work. It seems much less complicated that way. --Ken signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] vbo: Correctly handle source arrays in vbo_split_copy.
Reviewed-by: Brian Paul On Wed, Mar 14, 2018 at 2:47 PM, wrote: > From: Mathias Fröhlich > > Hi, > > Seems that the big patch did break something. > Below the fix. > > please review > > best > > Mathias > > > > > The original approach did optimize away a bit too many fields. > Restablish the pointer into the original array and correctly feed that > one. > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105471 > Fixes: 64d2a20480547d5897fd9d7b8fd306f2625138cb > mesa: Make gl_vertex_array contain pointers to first order VAO members. > Signed-off-by: Mathias Fröhlich > --- > src/mesa/vbo/vbo_split_copy.c | 8 +--- > 1 file changed, 5 insertions(+), 3 deletions(-) > > diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c > index 09b5b3b651..96828a073f 100644 > --- a/src/mesa/vbo/vbo_split_copy.c > +++ b/src/mesa/vbo/vbo_split_copy.c > @@ -63,6 +63,7 @@ struct copy_context { > struct { >GLuint attr; >GLuint size; > + const struct gl_vertex_array *array; >const GLubyte *src_ptr; > >struct gl_vertex_buffer_binding dstbinding; > @@ -258,7 +259,7 @@ elt(struct copy_context *copy, GLuint elt_idx) >GLuint i; > >for (i = 0; i < copy->nr_varying; i++) { > - const struct gl_vertex_array *srcarray = ©->array[i]; > + const struct gl_vertex_array *srcarray = copy->varying[i].array; > const struct gl_vertex_buffer_binding* srcbinding > = srcarray->BufferBinding; > const GLubyte *srcptr > @@ -449,6 +450,7 @@ replay_init(struct copy_context *copy) > GLuint j = copy->nr_varying++; > > copy->varying[j].attr = i; > + copy->varying[j].array = ©->array[i]; > copy->varying[j].size = attr_size(attrib); > copy->vertex_size += attr_size(attrib); > > @@ -520,7 +522,7 @@ replay_init(struct copy_context *copy) > /* Setup new vertex arrays to point into the output buffer: > */ > for (offset = 0, i = 0; i < copy->nr_varying; i++) { > - const struct gl_vertex_array *src = ©->array[i]; > + const struct gl_vertex_array *src = copy->varying[i].array; >const struct gl_array_attributes *srcattr = src->VertexAttrib; >struct gl_vertex_array *dst = ©->dstarray[i]; >struct gl_vertex_buffer_binding *dstbind = > ©->varying[i].dstbinding; > @@ -576,7 +578,7 @@ replay_finish(struct copy_context *copy) > /* Unmap VBO's */ > for (i = 0; i < copy->nr_varying; i++) { >struct gl_buffer_object *vbo = > - copy->array[i].BufferBinding->BufferObj; > + copy->varying[i].array->BufferBinding->BufferObj; >if (_mesa_is_bufferobj(vbo) && _mesa_bufferobj_mapped(vbo, > MAP_INTERNAL)) > ctx->Driver.UnmapBuffer(ctx, vbo, MAP_INTERNAL); > } > -- > 2.14.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] autotools+dri3: allow building against older xcb (v4)
On 15 March 2018 at 03:25, Daniel Stone wrote: > Hi, > > On 14 March 2018 at 13:04, Rob Clark wrote: >> I'm not sure everyone wants to be updating their dri3 in a forced >> march setting, this allows a nicer approach, esp when you want >> to build on distro that aren't brand new. > > I don't have that much of an opinion on whether the dependency should > be mandatory or not. I originally had #ifdefs and removed them when > reviewers asked me to. If people want to add them back, fine by me. > > That being said, these patches need changes, per comments below. One > thing missing entirely is making the version negotiation conditional: > when we call query_version for DRI3/Present, we need to make the > version we pass in conditional on whether or not we have new XCB. > Probably also wise to ifdef the multiplane_available variables, so > it's really obvious where any users are missing ifdefs. I was trying to minimise the ifdeffery, we could minimise the present ones with #ifndef #define #endif blocks at the top, the dri3 one I think we just have to live with. > > I'm happy to test this tomorrow and submit a new version if that's > easier for people. Please do. Dave. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] FLAG-DAY: NIR derefs
FWIW, the way I imagined doing this was something like: 1. Add nir_deref_instr and nir_deref_type_pointer. At this point, just make everything assert if the base deref isn't a nir_deref_var. This will be a bit of a flag-day, but also very mechanical. It'll also help us catch cases where we don't handle new-style derefs later. 2. Add a pass to flatten nir_deref_type_pointer into nir_deref_type_var if possible (i.e. if there's a clear chain up to the base variable without any phi nodes or whatever). This should always be possible for GLSL, as well as SPIR-V unless KHR_variable_pointers is enabled. We'll use this to avoid too much churn in drivers, passes that haven't been updated, etc. We might also want a pass to do the opposite, for converting passes where we don't want to have codepaths for both forms at once. 3. Modify nir_lower_io to handle new-style derefs, especially for shared variables (i.e. KHR_variable_pointers for anv). We might have to modify a few other passes, too. 4. Add the required deref lowering passes to all drivers. 5. Rewrite glsl_to_nir and spirv_to_nir to emit the new-style derefs. At the very least, we should be using this to implement the shared variable bits of KHR_variable_pointers. If we add stride/offset annotations to nir_deref_instr for UBO's and SSBO's, then we might also be able to get rid of the vtn_deref stuff entirely (although I'm not sure if that should be a goal right now). At this point, we can fix things up and move everything else over to new-style derefs at our leisure. Also, it should now be pretty straightforward to add support for shared variable pointers to radv without lowering everything to offsets up-front, which is nice. Connor On Wed, Mar 14, 2018 at 2:32 PM, Jason Ekstrand wrote: > All, > > Connor and I along with several others have been discussing for a while > changing the way NIR dereferences work. In particular, adding a new > nir_deref_instr type where the first one in the chain takes a variable and > is followed by a series of instructions which take another deref instruction > and do an array or structure dereference on it. > > Much of the motivation for this is some of the upcoming SPIR-V stuff where > we have more real pointers and deref chains don't really work anymore. It > will also allow for things such as CSE of common derefs which could make > analysis easier. This is similar to what LLVM does and it's working very > well for them. > > The reason for this e-mail is that this is going to be a flag-day change. > We've been talking about it for a while but this is going to be a major and > fairly painful change in the short term so no one has actually done it. > It's time we finally just suck it up and make it happen. While we will try > to make the change as incrementally and reviewably as possible but there is > a real limit as to what is possible here. My plan is to start cracking away > at this on Monday and hopefully have something working for i965/anv by the > end of the week or maybe some time the week after. If anyone has something > to say in opposition, please speak up now and not after I've spent a week > straight frantically hacking on NIR. > > I would like everyone to be respectful of the fact that this will be a major > change and very painful to rebase. If you've got outstanding NIR, GLSL, or > SPIR-V work that is likely to conflict with this, please try to land it > before Monday so that we can avoid rebase conflicts. If you have interest > in reviewing this, please try to be responsive so that we can get it > reviewed and landed before it becomes too painful. I'll try to send out > some preview patches as I go so that the data structures themselves can get > some review before the rest of the changes have been made. > > I'm also asking for help from Rob, Bas, and Eric if there are changes needed > in any of their drivers. I suspect the impact on back-end drivers will be > low because most of them don't use derefs directly, but it would be good of > people were on hand to help catch bugs if nothing else. > > Thanks, > > --Jason Ekstrand > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] vbo: Correctly handle source arrays in vbo_split_copy.
From: Mathias Fröhlich Hi, Seems that the big patch did break something. Below the fix. please review best Mathias The original approach did optimize away a bit too many fields. Restablish the pointer into the original array and correctly feed that one. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105471 Fixes: 64d2a20480547d5897fd9d7b8fd306f2625138cb mesa: Make gl_vertex_array contain pointers to first order VAO members. Signed-off-by: Mathias Fröhlich --- src/mesa/vbo/vbo_split_copy.c | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c index 09b5b3b651..96828a073f 100644 --- a/src/mesa/vbo/vbo_split_copy.c +++ b/src/mesa/vbo/vbo_split_copy.c @@ -63,6 +63,7 @@ struct copy_context { struct { GLuint attr; GLuint size; + const struct gl_vertex_array *array; const GLubyte *src_ptr; struct gl_vertex_buffer_binding dstbinding; @@ -258,7 +259,7 @@ elt(struct copy_context *copy, GLuint elt_idx) GLuint i; for (i = 0; i < copy->nr_varying; i++) { - const struct gl_vertex_array *srcarray = ©->array[i]; + const struct gl_vertex_array *srcarray = copy->varying[i].array; const struct gl_vertex_buffer_binding* srcbinding = srcarray->BufferBinding; const GLubyte *srcptr @@ -449,6 +450,7 @@ replay_init(struct copy_context *copy) GLuint j = copy->nr_varying++; copy->varying[j].attr = i; + copy->varying[j].array = ©->array[i]; copy->varying[j].size = attr_size(attrib); copy->vertex_size += attr_size(attrib); @@ -520,7 +522,7 @@ replay_init(struct copy_context *copy) /* Setup new vertex arrays to point into the output buffer: */ for (offset = 0, i = 0; i < copy->nr_varying; i++) { - const struct gl_vertex_array *src = ©->array[i]; + const struct gl_vertex_array *src = copy->varying[i].array; const struct gl_array_attributes *srcattr = src->VertexAttrib; struct gl_vertex_array *dst = ©->dstarray[i]; struct gl_vertex_buffer_binding *dstbind = ©->varying[i].dstbinding; @@ -576,7 +578,7 @@ replay_finish(struct copy_context *copy) /* Unmap VBO's */ for (i = 0; i < copy->nr_varying; i++) { struct gl_buffer_object *vbo = - copy->array[i].BufferBinding->BufferObj; + copy->varying[i].array->BufferBinding->BufferObj; if (_mesa_is_bufferobj(vbo) && _mesa_bufferobj_mapped(vbo, MAP_INTERNAL)) ctx->Driver.UnmapBuffer(ctx, vbo, MAP_INTERNAL); } -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105506] Vulkan MSAA is broken on SI
https://bugs.freedesktop.org/show_bug.cgi?id=105506 --- Comment #5 from Jason Ekstrand --- I recommend you file a bug against the validation layers as at least the first of the two comments Bas made should be invalid. The second is valid but does not do what you want. -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] FLAG-DAY: NIR derefs
All, Connor and I along with several others have been discussing for a while changing the way NIR dereferences work. In particular, adding a new nir_deref_instr type where the first one in the chain takes a variable and is followed by a series of instructions which take another deref instruction and do an array or structure dereference on it. Much of the motivation for this is some of the upcoming SPIR-V stuff where we have more real pointers and deref chains don't really work anymore. It will also allow for things such as CSE of common derefs which could make analysis easier. This is similar to what LLVM does and it's working very well for them. The reason for this e-mail is that this is going to be a flag-day change. We've been talking about it for a while but this is going to be a major and fairly painful change in the short term so no one has actually done it. It's time we finally just suck it up and make it happen. While we will try to make the change as incrementally and reviewably as possible but there is a real limit as to what is possible here. My plan is to start cracking away at this on Monday and hopefully have something working for i965/anv by the end of the week or maybe some time the week after. If anyone has something to say in opposition, please speak up now and not after I've spent a week straight frantically hacking on NIR. I would like everyone to be respectful of the fact that this will be a major change and very painful to rebase. If you've got outstanding NIR, GLSL, or SPIR-V work that is likely to conflict with this, please try to land it before Monday so that we can avoid rebase conflicts. If you have interest in reviewing this, please try to be responsive so that we can get it reviewed and landed before it becomes too painful. I'll try to send out some preview patches as I go so that the data structures themselves can get some review before the rest of the changes have been made. I'm also asking for help from Rob, Bas, and Eric if there are changes needed in any of their drivers. I suspect the impact on back-end drivers will be low because most of them don't use derefs directly, but it would be good of people were on hand to help catch bugs if nothing else. Thanks, --Jason Ekstrand ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/4] st/nine: Fix bad tracking of bound vs textures
For the series Tested-by: Dieter Nützel on Polaris 20 (RX580) with several Wine-staging (Nine) apps, but I've to note, that I do NOT have Guild Wars 2 and Torchlight... Dieter Am 13.03.2018 23:09, schrieb Axel Davy: An incorrect formula was used to compute bound_samplers_mask_vs. Since s is above always 8 for vs and the variable is encoded on 8 bits, it was always 0. This resulted in commiting the samplers every call when there was at least one texture read in the vs shader. Signed-off-by: Axel Davy --- src/gallium/state_trackers/nine/nine_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c index 26b2dea3bd..c81a05a952 100644 --- a/src/gallium/state_trackers/nine/nine_state.c +++ b/src/gallium/state_trackers/nine/nine_state.c @@ -980,7 +980,7 @@ update_textures_and_samplers(struct NineDevice9 *device) context->changed.sampler[s] = ~0; } -context->bound_samplers_mask_vs |= (1 << s); +context->bound_samplers_mask_vs |= (1 << i); } cso_set_sampler_views(context->cso, PIPE_SHADER_VERTEX, num_textures, view); ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105506] Vulkan MSAA is broken on SI
https://bugs.freedesktop.org/show_bug.cgi?id=105506 Bas Nieuwenhuizen changed: What|Removed |Added Status|NEW |RESOLVED Resolution|--- |NOTOURBUG --- Comment #4 from Bas Nieuwenhuizen --- As discussed on #dri-devel, the example application contains several layout issues such as 1) using UNDEFINED as the source layout for the vkCmdResolveImage. 2) using UNDEFINED as the initial layout for the second renderpass when you want to preserve contents. These caused issues with MSAA on a Vega. I am not completely certain that fixing these will fix SI. Please fix these and then take a look in renderdoc on where it is going wrong. If you still suspect the driver after that we can take another look. -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC] Mesa release improvements - Feature and Stable releases
On Wed, 2018-03-14 at 16:02 +, Emil Velikov wrote: [...] > > Just double-checking: > I would suspect you're not suggesting removing the existing email/poke scheme? Partially. The "announce" mail for the pre-branching period will still happen, pointing to the "Metabug" in which to add the WIP features that developers intend to land before the deadline. If some of the developers just reply by mail/IRC/you-name-it, then it will be the release manager task to add the blocking bugs with the WIP features, as a way of documenting them. > Providing another means to devs to track/handle things is good IMHO. > Whether developers will like it is up-to them. Everyone, your input is > appreciated! > > > I'm slightly worried that it might cause extra confusion. > Some crude examples follow: > - I don't use bugzilla/etc to track my feature work - most teams I don't think much interaction/documentation is needed. Just mention the WIP feature and update its status eventually ... and only for the ones developer X wants to have at branchpoint Y before that happens. The rest of the work of developer X doesn't need to be in Bugzilla. > - Do I open another bug, or list my feature in the metabug - seeming > an ongoing theme with metabugs I think it should be a new blocking bug but I'm open to just document it in the Metabug. > - Do I add the bug, reply to the email or both Preferably, just add the bug. Once the bug is created and all the parties are in Cc for the bug, I understand there is no need for any other way of communication. I'm still open to reconsidering, though. -- Br, Andres ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/1] nir: Use a freelist in nir_opt_dce to avoid spamming ralloc
Hello Thomas, is this useful even after '[Mesa-dev] [PATCH 0/2] V2: Use hash table cloning in copy propagation' landed? I've running both together with Dave's '[Mesa-dev] [PATCH] radv/winsys: replace bo list searchs with a hash table.' patch. Dieter Am 24.01.2018 08:33, schrieb Thomas Helland: 2018-01-21 23:58 GMT+01:00 Eric Anholt : Thomas Helland writes: Also, allocate worklist_elem in groups of 20, to reduce the burden of allocation. Do not use rzalloc, as there is no need. This lets us drop the number of calls to ralloc from aproximately 10% of all calls to ralloc(130 000 calls), down to a mere 2000 calls to ralloc_array_size. This cuts the runtime of shader-db by 1%, while at the same time reducing the number of stalled cycles, executed cycles, and executed instructions by about 1 % as reported by perf. I did a five-run benchmark pre and post and got a statistical variance less than 0.1% pre and post. This was with i965's ir validation polluting the benchmark, so the numbers are even better in release builds. Performance change as found with perf-diff: 4.74% -0.23% libc-2.26.so[.] _int_malloc 1.88% -0.21% libc-2.26.so[.] malloc 2.27% +0.16% libmesa_dri_drivers.so [.] match_value.part.7 2.95% -0.12% libc-2.26.so[.] _int_free +0.11% libmesa_dri_drivers.so [.] worklist_push 1.22% -0.08% libc-2.26.so[.] malloc_consolidate 0.16% -0.06% libmesa_dri_drivers.so [.] mark_live_cb 1.21% +0.06% libmesa_dri_drivers.so [.] match_expression.part.6 0.75% -0.05% libc-2.26.so[.] cfree@GLIBC_2.2.5 0.50% -0.05% libmesa_dri_drivers.so [.] ralloc_size 0.57% +0.04% libmesa_dri_drivers.so [.] nir_replace_instr 1.29% -0.04% libmesa_dri_drivers.so [.] unsafe_free I'm curious, since a NIR instruction worklist seems like a generally useful thing to have: Could nir_worklist.c keep the implementation of this? Also, I wonder if it wouldn't be even better to have a u_dynarray of instructions in the worklist, with push/pop on the end of the array, and a struct set tracking the instructions in the array to avoid double-adding. I actually don't know if that would be better or not, so I'd be happy with the worklist management just moved to nir_worklist.c. I'll look into this to see what I can do. nir_worklist.c at this time has only a block worklist. This numbers all the blocks, uses a bitset for checking if the item is present, and uses an array with an index pointing to the start of the queue of blocks in the buffer. The same scheme could be easily used for ssa-defs, as these are also numbered. I actually did this for the VRP pass I wrote years ago. However, for instructions we do not have a way of numbering them, so a different scheme would have to be used. A dynarray + set type of thing, us you're suggesting, might get us where we want. I'll see what I can come up with. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] st/mesa: Factorize duplicate code in st_BlitFramebuffer()
Pushed! Thanks. Marek On Tue, Feb 20, 2018 at 8:30 AM, Guillaume Charifi wrote: > --- > src/mesa/state_tracker/st_cb_blit.c | 76 > + > 1 file changed, 26 insertions(+), 50 deletions(-) > > diff --git a/src/mesa/state_tracker/st_cb_blit.c > b/src/mesa/state_tracker/st_cb_blit.c > index 8aa849b3fc..564ad5 100644 > --- a/src/mesa/state_tracker/st_cb_blit.c > +++ b/src/mesa/state_tracker/st_cb_blit.c > @@ -174,53 +174,29 @@ st_BlitFramebuffer(struct gl_context *ctx, > if (mask & GL_COLOR_BUFFER_BIT) { >struct gl_renderbuffer_attachment *srcAtt = > &readFB->Attachment[readFB->_ColorReadBufferIndex]; > + GLuint i; > >blit.mask = PIPE_MASK_RGBA; > >if (srcAtt->Type == GL_TEXTURE) { > struct st_texture_object *srcObj = > st_texture_object(srcAtt->Texture); > - GLuint i; > > if (!srcObj || !srcObj->pt) { > return; > } > > - for (i = 0; i < drawFB->_NumColorDrawBuffers; i++) { > -struct st_renderbuffer *dstRb = > - st_renderbuffer(drawFB->_ColorDrawBuffers[i]); > + blit.src.resource = srcObj->pt; > + blit.src.level = srcAtt->TextureLevel; > + blit.src.box.z = srcAtt->Zoffset + srcAtt->CubeMapFace; > + blit.src.format = srcObj->pt->format; > > -if (dstRb) { > - struct pipe_surface *dstSurf; > - > - st_update_renderbuffer_surface(st, dstRb); > - > - dstSurf = dstRb->surface; > - > - if (dstSurf) { > - blit.dst.resource = dstSurf->texture; > - blit.dst.level = dstSurf->u.tex.level; > - blit.dst.box.z = dstSurf->u.tex.first_layer; > - blit.dst.format = dstSurf->format; > - > - blit.src.resource = srcObj->pt; > - blit.src.level = srcAtt->TextureLevel; > - blit.src.box.z = srcAtt->Zoffset + srcAtt->CubeMapFace; > - blit.src.format = srcObj->pt->format; > - > - if (!ctx->Color.sRGBEnabled) > - blit.src.format = util_format_linear(blit.src.format); > - > - st->pipe->blit(st->pipe, &blit); > - dstRb->defined = true; /* front buffer tracking */ > - } > -} > - } > + if (!ctx->Color.sRGBEnabled) > +blit.src.format = util_format_linear(blit.src.format); >} >else { > struct st_renderbuffer *srcRb = > st_renderbuffer(readFB->_ColorReadBuffer); > struct pipe_surface *srcSurf; > - GLuint i; > > if (!srcRb) > return; > @@ -232,31 +208,31 @@ st_BlitFramebuffer(struct gl_context *ctx, > > srcSurf = srcRb->surface; > > - for (i = 0; i < drawFB->_NumColorDrawBuffers; i++) { > -struct st_renderbuffer *dstRb = > - st_renderbuffer(drawFB->_ColorDrawBuffers[i]); > + blit.src.resource = srcSurf->texture; > + blit.src.level = srcSurf->u.tex.level; > + blit.src.box.z = srcSurf->u.tex.first_layer; > + blit.src.format = srcSurf->format; > + } > > -if (dstRb) { > - struct pipe_surface *dstSurf; > + for (i = 0; i < drawFB->_NumColorDrawBuffers; i++) { > + struct st_renderbuffer *dstRb = > +st_renderbuffer(drawFB->_ColorDrawBuffers[i]); > > - st_update_renderbuffer_surface(st, dstRb); > + if (dstRb) { > +struct pipe_surface *dstSurf; > > - dstSurf = dstRb->surface; > +st_update_renderbuffer_surface(st, dstRb); > > - if (dstSurf) { > - blit.dst.resource = dstSurf->texture; > - blit.dst.level = dstSurf->u.tex.level; > - blit.dst.box.z = dstSurf->u.tex.first_layer; > - blit.dst.format = dstSurf->format; > +dstSurf = dstRb->surface; > > - blit.src.resource = srcSurf->texture; > - blit.src.level = srcSurf->u.tex.level; > - blit.src.box.z = srcSurf->u.tex.first_layer; > - blit.src.format = srcSurf->format; > +if (dstSurf) { > + blit.dst.resource = dstSurf->texture; > + blit.dst.level = dstSurf->u.tex.level; > + blit.dst.box.z = dstSurf->u.tex.first_layer; > + blit.dst.format = dstSurf->format; > > - st->pipe->blit(st->pipe, &blit); > - dstRb->defined = true; /* front buffer tracking */ > - } > + st->pipe->blit(st->pipe, &blit); > + dstRb->defined = true; /* front buffer tracking */ > } > } >} > -- > 2.14.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.fr
Re: [Mesa-dev] [PATCH 1/3] wayland-drm: Expose server-side xbgr2101010 and abgr2101010 formats.
On Tue, Mar 13, 2018 at 5:30 AM, Daniel Stone wrote: > Hi Mario, > > On 12 March 2018 at 20:45, Mario Kleiner wrote: >> This way the wayland server can signal support for these formats >> to wayland EGL clients. This is currently used by nouveau for 10 >> bpc support. >> >> Tested with glmark2-wayland and glmark2-es2-wayland under weston >> to now expose 10 bpc EGL configs under nouveau. > > Do we need a way to ensure that the backend driver does actually > support BGR for texturing? AFAIK, if a client happens to select a BGR > config on other drivers now - using a compositor which does not > implement wl_drm - this will break for them. I think in practice, every hw driver can support both for texturing if it can support one, since swizzles are always possible (due to ARB_texture_swizzle). In practice at least nouveau prior to Mario's patches only supported it one way. I just checked r600, radeonsi, i965 and freedreno, and they appear to support both for texturing. I think that covers the majority of the likely 10bpc users. -ilia ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] egl/dri2: move wayland header inclusion where applicable
Emil Velikov wrote: From: Emil Velikov Instead of indirectly pulling the wayland headers everywhere, use forward declarations and #include only as needed. Should effectively fix build errors like the following: make[5]: Entering directory '/.../src/gallium/state_trackers/omx/tizonia' CC h264dprc.lo In file included from h264dprc.c:45:0: .../src/egl/drivers/dri2/egl_dri2.h:47:10: fatal error: wayland/wayland-egl/wayland-egl-backend.h: No such file or directory #include "wayland/wayland-egl/wayland-egl-backend.h" Cc: Andy Furniss Cc: Dylan Baker Signed-off-by: Emil Velikov --- Dylan had epiphany a minute after I hit Send. Sorry about that. Gents this should remove the need of any the following patches. Please you give them a try, manually reverting the meson fix. Autotools build is good for me with this patch. Thanks! https://patchwork.freedesktop.org/patch/208770/ https://patchwork.freedesktop.org/patch/208306/ https://patchwork.freedesktop.org/patch/208322/ --- src/egl/drivers/dri2/egl_dri2.c | 1 + src/egl/drivers/dri2/egl_dri2.h | 12 +--- src/egl/drivers/dri2/platform_wayland.c | 2 ++ 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index 864f7eb0c68..535806e4bfe 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -48,6 +48,7 @@ #include #ifdef HAVE_WAYLAND_PLATFORM +#include #include "wayland-drm.h" #include "wayland-drm-client-protocol.h" #include "linux-dmabuf-unstable-v1-client-protocol.h" diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h index bd637f73c9d..adabc527f85 100644 --- a/src/egl/drivers/dri2/egl_dri2.h +++ b/src/egl/drivers/dri2/egl_dri2.h @@ -43,9 +43,15 @@ #endif #ifdef HAVE_WAYLAND_PLATFORM -#include -#include "wayland/wayland-egl/wayland-egl-backend.h" -/* forward declarations of protocol elements */ +/* forward declarations to avoid pulling wayland headers everywhere */ +struct wl_egl_window; +struct wl_event_queue; +struct wl_callback; +struct wl_display; +struct wl_drm; +struct wl_registry; +struct wl_shm; +struct wl_surface; struct zwp_linux_dmabuf_v1; #endif diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index 877f7933b9a..94f7defa657 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -49,6 +49,8 @@ #include "wayland-drm-client-protocol.h" #include "linux-dmabuf-unstable-v1-client-protocol.h" +#include "wayland/wayland-egl/wayland-egl-backend.h" + #ifndef DRM_FORMAT_MOD_INVALID #define DRM_FORMAT_MOD_INVALID ((1ULL << 56) - 1) #endif ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/5] i965/miptree: Map with movntdqa for linear buffers only
On Tue, Jan 09, 2018 at 11:17:01PM -0800, Scott D Phillips wrote: > Removes a place where gtt mapping is used. > --- > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 4 +++- > 1 file changed, 3 insertions(+), 1 deletion(-) > This patch is Reviewed-by: Nanley Chery > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > index e4a3f163d2..fa4ae06399 100644 > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > @@ -3707,7 +3707,8 @@ intel_miptree_map(struct brw_context *brw, > #if defined(USE_SSE41) > } else if (!(mode & GL_MAP_WRITE_BIT) && >!mt->compressed && cpu_has_sse4_1 && > - (mt->surf.row_pitch % 16 == 0)) { > + (mt->surf.row_pitch % 16 == 0) && > + (mt->surf.tiling == ISL_TILING_LINEAR)) { >intel_miptree_map_movntdqa(brw, mt, map, level, slice); > #endif > } else if (mt->surf.tiling != ISL_TILING_LINEAR) { > @@ -3752,6 +3753,7 @@ intel_miptree_unmap(struct brw_context *brw, > } else if (!(map->mode & GL_MAP_WRITE_BIT) && >!mt->compressed && cpu_has_sse4_1 && >(mt->surf.row_pitch % 16 == 0) && > + (mt->surf.tiling == ISL_TILING_LINEAR) && >map->buffer) { >intel_miptree_unmap_movntdqa(brw, mt, map, level, slice); > #endif > -- > 2.14.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105507] Crash when destroying a newly resized EGLsurface with wayland egl (dri2)
https://bugs.freedesktop.org/show_bug.cgi?id=105507 --- Comment #2 from Daniel Stone --- You could place any orphaned wl_buffers on an per-surface list instead, and spin at destruction until that emptied. I won't have the time to look into it myself for a while though. Johan - which test hits this? -- You are receiving this mail because: You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105507] Crash when destroying a newly resized EGLsurface with wayland egl (dri2)
https://bugs.freedesktop.org/show_bug.cgi?id=105507 Emil Velikov changed: What|Removed |Added CC||dan...@fooishbar.org --- Comment #1 from Emil Velikov --- Daniel, any suggestions? -- You are receiving this mail because: You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] egl/dri2: move wayland header inclusion where applicable
From: Emil Velikov Instead of indirectly pulling the wayland headers everywhere, use forward declarations and #include only as needed. Should effectively fix build errors like the following: make[5]: Entering directory '/.../src/gallium/state_trackers/omx/tizonia' CC h264dprc.lo In file included from h264dprc.c:45:0: .../src/egl/drivers/dri2/egl_dri2.h:47:10: fatal error: wayland/wayland-egl/wayland-egl-backend.h: No such file or directory #include "wayland/wayland-egl/wayland-egl-backend.h" Cc: Andy Furniss Cc: Dylan Baker Signed-off-by: Emil Velikov --- Dylan had epiphany a minute after I hit Send. Sorry about that. Gents this should remove the need of any the following patches. Please you give them a try, manually reverting the meson fix. Thanks! https://patchwork.freedesktop.org/patch/208770/ https://patchwork.freedesktop.org/patch/208306/ https://patchwork.freedesktop.org/patch/208322/ --- src/egl/drivers/dri2/egl_dri2.c | 1 + src/egl/drivers/dri2/egl_dri2.h | 12 +--- src/egl/drivers/dri2/platform_wayland.c | 2 ++ 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index 864f7eb0c68..535806e4bfe 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -48,6 +48,7 @@ #include #ifdef HAVE_WAYLAND_PLATFORM +#include #include "wayland-drm.h" #include "wayland-drm-client-protocol.h" #include "linux-dmabuf-unstable-v1-client-protocol.h" diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h index bd637f73c9d..adabc527f85 100644 --- a/src/egl/drivers/dri2/egl_dri2.h +++ b/src/egl/drivers/dri2/egl_dri2.h @@ -43,9 +43,15 @@ #endif #ifdef HAVE_WAYLAND_PLATFORM -#include -#include "wayland/wayland-egl/wayland-egl-backend.h" -/* forward declarations of protocol elements */ +/* forward declarations to avoid pulling wayland headers everywhere */ +struct wl_egl_window; +struct wl_event_queue; +struct wl_callback; +struct wl_display; +struct wl_drm; +struct wl_registry; +struct wl_shm; +struct wl_surface; struct zwp_linux_dmabuf_v1; #endif diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index 877f7933b9a..94f7defa657 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -49,6 +49,8 @@ #include "wayland-drm-client-protocol.h" #include "linux-dmabuf-unstable-v1-client-protocol.h" +#include "wayland/wayland-egl/wayland-egl-backend.h" + #ifndef DRM_FORMAT_MOD_INVALID #define DRM_FORMAT_MOD_INVALID ((1ULL << 56) - 1) #endif -- 2.16.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/4] st/nine: Fix bad tracking of bound vs textures
On Tue, 2018-03-13 at 23:09 +0100, Axel Davy wrote: > An incorrect formula was used to compute bound_samplers_mask_vs. > Since s is above always 8 for vs and the variable is encoded on 8 > bits, > it was always 0. > This resulted in commiting the samplers every call when > there was at least one texture read in the vs shader. > > Signed-off-by: Axel Davy The series is Reviewed-by: Patrick Rudolph Please also include it into 17.3 stable. > --- > src/gallium/state_trackers/nine/nine_state.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/gallium/state_trackers/nine/nine_state.c > b/src/gallium/state_trackers/nine/nine_state.c > index 26b2dea3bd..c81a05a952 100644 > --- a/src/gallium/state_trackers/nine/nine_state.c > +++ b/src/gallium/state_trackers/nine/nine_state.c > @@ -980,7 +980,7 @@ update_textures_and_samplers(struct NineDevice9 > *device) > context->changed.sampler[s] = ~0; > } > > -context->bound_samplers_mask_vs |= (1 << s); > +context->bound_samplers_mask_vs |= (1 << i); > } > > cso_set_sampler_views(context->cso, PIPE_SHADER_VERTEX, > num_textures, view); ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] tizonia egl build fail
On 13 March 2018 at 19:20, Dylan Baker wrote: > Quoting Andy Furniss (2018-03-06 15:12:37) >> make[5]: Entering directory >> '/mnt/sdc1/Gits/mesa/src/gallium/state_trackers/omx/tizonia' >>CC h264dprc.lo >> In file included from h264dprc.c:45:0: >> ../../../../../src/egl/drivers/dri2/egl_dri2.h:47:10: fatal error: >> wayland/wayland-egl/wayland-egl-backend.h: No such file or directory >> #include "wayland/wayland-egl/wayland-egl-backend.h" >>^~~ >> compilation terminated. > Emil, this was the other patch. > Thanks Dylan! Please include the error in the commit message of your (better) patch. -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] autotools+dri3: allow building against older xcb (v4)
Hi, On 14 March 2018 at 13:04, Rob Clark wrote: > I'm not sure everyone wants to be updating their dri3 in a forced > march setting, this allows a nicer approach, esp when you want > to build on distro that aren't brand new. I don't have that much of an opinion on whether the dependency should be mandatory or not. I originally had #ifdefs and removed them when reviewers asked me to. If people want to add them back, fine by me. That being said, these patches need changes, per comments below. One thing missing entirely is making the version negotiation conditional: when we call query_version for DRI3/Present, we need to make the version we pass in conditional on whether or not we have new XCB. Probably also wise to ifdef the multiplane_available variables, so it's really obvious where any users are missing ifdefs. I'm happy to test this tomorrow and submit a new version if that's easier for people. > @@ -327,6 +327,7 @@ dri3_create_image_khr_pixmap_from_buffers(_EGLDisplay > *disp, _EGLContext *ctx, >EGLClientBuffer buffer, >const EGLint *attr_list) > { > +#ifdef HAVE_DRI3_MODIFIERS > struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); > struct dri2_egl_image *dri2_img; > xcb_dri3_buffers_from_pixmap_cookie_t bp_cookie; > @@ -376,6 +377,9 @@ dri3_create_image_khr_pixmap_from_buffers(_EGLDisplay > *disp, _EGLContext *ctx, > } > > return &dri2_img->base; > +#else > + return NULL; > +#endif > } Just ifdef out the entire function, don't return NULL. > @@ -1272,6 +1276,7 @@ dri3_alloc_render_buffer(struct loader_dri3_drawable > *draw, unsigned int format, > pixmap = xcb_generate_id(draw->conn); > if (draw->multiplanes_available && > buffer->modifier != DRM_FORMAT_MOD_INVALID) { > +#ifdef HAVE_DRI3_MODIFIERS >xcb_dri3_pixmap_from_buffers(draw->conn, > pixmap, > draw->drawable, > @@ -1284,6 +1289,7 @@ dri3_alloc_render_buffer(struct loader_dri3_drawable > *draw, unsigned int format, > depth, buffer->cpp * 8, > buffer->modifier, > buffer_fds); > +#endif > } else { >xcb_dri3_pixmap_from_buffer(draw->conn, >pixmap, This ifdef needs to wrap the branch, so that the single-buffer xcb_dri3_pixmap_from_buffer() always gets called if we built against old XCB, else new-server + old-XCB-Mesa never allocates a render buffer for X11 surfaces. > @@ -1567,7 +1575,7 @@ dri3_get_pixmap_buffer(__DRIdrawable *driDrawable, > unsigned int format, >(sync_fence = xcb_generate_id(draw->conn)), >false, >fence_fd); > - > +#ifdef HAVE_DRI3_MODIFIERS > if (draw->multiplanes_available && > draw->ext->image->base.version >= 15 && > draw->ext->image->createImageFromDmaBufs2) { > @@ -1586,7 +1594,9 @@ dri3_get_pixmap_buffer(__DRIdrawable *driDrawable, > unsigned int format, >width = bps_reply->width; >height = bps_reply->height; >free(bps_reply); > - } else { > + } else > +#endif > + { >xcb_dri3_buffer_from_pixmap_cookie_t bp_cookie; >xcb_dri3_buffer_from_pixmap_reply_t *bp_reply; Jason complained about control flow being intermingled with #ifdefs like this. I don't have any suggestions as to how to do it better though, which is why I did it like this in the first place. Cheers, Daniel ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 5/8] intel: devinfo: add helper functions to fill fusing masks values
There are a couple of ways we can get the fusing information from the kernel : - Through DRM_I915_GETPARAM with the SLICE_MASK/SUBSLICE_MASK parameters - Through the new DRM_IOCTL_I915_QUERY by requesting the DRM_I915_QUERY_TOPOLOGY_INFO The second method is more accurate and also gives us the EUs fusing masks. It's also a requirement for CNL as this platform has asymetric subslices and the first method SUBSLICE_MASK value is assumed uniform across slices. Signed-off-by: Lionel Landwerlin --- src/intel/dev/gen_device_info.c | 129 src/intel/dev/gen_device_info.h | 11 2 files changed, 140 insertions(+) diff --git a/src/intel/dev/gen_device_info.c b/src/intel/dev/gen_device_info.c index c1bdc997f2c..a8c9f7738b2 100644 --- a/src/intel/dev/gen_device_info.c +++ b/src/intel/dev/gen_device_info.c @@ -28,8 +28,11 @@ #include #include "gen_device_info.h" #include "compiler/shader_enums.h" +#include "util/bitscan.h" #include "util/macros.h" +#include + /** * Get the PCI ID for the device name. * @@ -913,6 +916,132 @@ fill_masks(struct gen_device_info *devinfo) } } +static void +reset_masks(struct gen_device_info *devinfo) +{ + devinfo->subslice_slice_stride = + devinfo->eu_subslice_stride = + devinfo->eu_slice_stride = 0; + + devinfo->num_slices = + devinfo->num_eu_per_subslice = 0; + memset(devinfo->num_subslices, 0, sizeof(devinfo->num_subslices)); + + memset(&devinfo->slice_masks, 0, sizeof(devinfo->slice_masks)); + memset(devinfo->subslice_masks, 0, sizeof(devinfo->subslice_masks)); + memset(devinfo->eu_masks, 0, sizeof(devinfo->eu_masks)); +} + +void +gen_device_info_update_from_masks(struct gen_device_info *devinfo, + uint32_t slice_mask, + uint32_t subslice_mask, + uint32_t n_eus) +{ + reset_masks(devinfo); + + assert((slice_mask & 0xff) == slice_mask); + + devinfo->slice_masks = slice_mask; + devinfo->num_slices = __builtin_popcount(devinfo->slice_masks); + + uint32_t max_slices = util_last_bit(slice_mask); + uint32_t max_subslices = util_last_bit(subslice_mask); + devinfo->subslice_slice_stride = DIV_ROUND_UP(max_subslices, 8); + uint32_t n_subslices = 0; + for (int s = 0; s < util_last_bit(slice_mask); s++) { + if ((slice_mask & (1UL << s)) == 0) + continue; + + for (int b = 0; b < devinfo->subslice_slice_stride; b++) { + int subslice_offset = s * devinfo->subslice_slice_stride + b; + + devinfo->subslice_masks[subslice_offset] = +(subslice_mask >> (b * 8)) & 0xff; + devinfo->num_subslices[s] += +__builtin_popcount(devinfo->subslice_masks[subslice_offset]); + } + + n_subslices += devinfo->num_subslices[s]; + } + + /* We expect the total number of EUs to be uniformly distributed throughout +* the subslices. +*/ + assert((n_eus % n_subslices) == 0); + devinfo->num_eu_per_subslice = n_eus / n_subslices; + + devinfo->eu_subslice_stride = DIV_ROUND_UP(devinfo->num_eu_per_subslice, 8); + devinfo->eu_slice_stride = devinfo->eu_subslice_stride * max_subslices; + + for (int s = 0; s < max_slices; s++) { + if ((slice_mask & (1UL << s)) == 0) + continue; + + for (int ss = 0; ss < max_subslices; ss++) { + if ((subslice_mask & (1UL << ss)) == 0) +continue; + + for (int b = 0; b < devinfo->eu_subslice_stride; b++) { +int eus_offset = s * devinfo->eu_slice_stride + + ss * devinfo->eu_subslice_stride + b; + +devinfo->eu_masks[eus_offset] = + (((1UL << devinfo->num_eu_per_subslice) - 1) >> (b * 8)) & 0xff; + } + } + } +} + +void +gen_device_info_update_from_topology(struct gen_device_info *devinfo, + const struct drm_i915_query_topology_info *topology) +{ + reset_masks(devinfo); + + devinfo->subslice_slice_stride = topology->subslice_stride; + + devinfo->eu_subslice_stride = DIV_ROUND_UP(topology->max_eus_per_subslice, 8); + devinfo->eu_slice_stride = topology->max_subslices * devinfo->eu_subslice_stride; + + assert(sizeof(devinfo->slice_masks) >= DIV_ROUND_UP(topology->max_slices, 8)); + memcpy(&devinfo->slice_masks, topology->data, DIV_ROUND_UP(topology->max_slices, 8)); + devinfo->num_slices = __builtin_popcount(devinfo->slice_masks); + + uint32_t subslice_mask_len = + topology->max_slices * topology->subslice_stride; + assert(sizeof(devinfo->subslice_masks) >= subslice_mask_len); + memcpy(devinfo->subslice_masks, &topology->data[topology->subslice_offset], + subslice_mask_len); + + uint32_t n_subslices = 0; + for (int s = 0; s < topology->max_slices; s++) { + if ((devinfo->slice_masks & (1UL << s)) == 0) + continue; + + for (int b = 0; b < devinfo->subslice_slice_stride; b++) { + de
[Mesa-dev] [PATCH v2 4/8] intel: devinfo: meson: include drm uapi
Already available with the autotools build. Signed-off-by: Lionel Landwerlin --- src/intel/dev/meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/intel/dev/meson.build b/src/intel/dev/meson.build index 3346fe60c07..9369fd3c0da 100644 --- a/src/intel/dev/meson.build +++ b/src/intel/dev/meson.build @@ -28,6 +28,6 @@ files_libintel_dev = files( libintel_dev = static_library( ['intel_dev'], files_libintel_dev, - include_directories : [inc_common, inc_intel], + include_directories : [inc_common, inc_intel, inc_drm_uapi], c_args : [c_vis_args, no_override_init_args], ) -- 2.16.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 3/8] drm-uapi: bump headers
This commit is meant to be replaced with a proper bump from drm-next. --- include/drm-uapi/i915_drm.h | 146 +++- 1 file changed, 145 insertions(+), 1 deletion(-) diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h index 7f28eea4035..9dfebbbe117 100644 --- a/include/drm-uapi/i915_drm.h +++ b/include/drm-uapi/i915_drm.h @@ -102,6 +102,46 @@ enum drm_i915_gem_engine_class { I915_ENGINE_CLASS_INVALID = -1 }; +/** + * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915 + * + */ + +enum drm_i915_pmu_engine_sample { + I915_SAMPLE_BUSY = 0, + I915_SAMPLE_WAIT = 1, + I915_SAMPLE_SEMA = 2 +}; + +#define I915_PMU_SAMPLE_BITS (4) +#define I915_PMU_SAMPLE_MASK (0xf) +#define I915_PMU_SAMPLE_INSTANCE_BITS (8) +#define I915_PMU_CLASS_SHIFT \ + (I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS) + +#define __I915_PMU_ENGINE(class, instance, sample) \ + ((class) << I915_PMU_CLASS_SHIFT | \ + (instance) << I915_PMU_SAMPLE_BITS | \ + (sample)) + +#define I915_PMU_ENGINE_BUSY(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY) + +#define I915_PMU_ENGINE_WAIT(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT) + +#define I915_PMU_ENGINE_SEMA(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA) + +#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) + +#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0) +#define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1) +#define I915_PMU_INTERRUPTS__I915_PMU_OTHER(2) +#define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3) + +#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY + /* Each region is a minimum of 16k, and there are at most 255 of them. */ #define I915_NR_TEX_REGIONS 255/* table size 2k - maximum due to use @@ -278,6 +318,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_PERF_OPEN 0x36 #define DRM_I915_PERF_ADD_CONFIG 0x37 #define DRM_I915_PERF_REMOVE_CONFIG0x38 +#define DRM_I915_QUERY 0x39 #define DRM_IOCTL_I915_INITDRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -335,6 +376,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param) #define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config) #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64) +#define DRM_IOCTL_I915_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query) /* Allow drivers to submit batchbuffers directly to hardware, relying * on the security mechanisms provided by hardware. @@ -1318,7 +1360,9 @@ struct drm_intel_overlay_attrs { * active on a given plane. */ -#define I915_SET_COLORKEY_NONE (1<<0) /* disable color key matching */ +#define I915_SET_COLORKEY_NONE (1<<0) /* Deprecated. Instead set + * flags==0 to disable colorkeying. + */ #define I915_SET_COLORKEY_DESTINATION (1<<1) #define I915_SET_COLORKEY_SOURCE (1<<2) struct drm_intel_sprite_colorkey { @@ -1573,6 +1617,106 @@ struct drm_i915_perf_oa_config { __u64 flex_regs_ptr; }; +struct drm_i915_query_item { + __u64 query_id; +#define DRM_I915_QUERY_TOPOLOGY_INFO1 + + /* +* When set to zero by userspace, this is filled with the size of the +* data to be written at the data_ptr pointer. The kernel set this +* value to a negative value to signal an error on a particular query +* item. +*/ + __s32 length; + + /* +* Unused for now. +*/ + __u32 flags; + + /* +* Data will be written at the location pointed by data_ptr when the +* value of length matches the length of the data to be written by the +* kernel. +*/ + __u64 data_ptr; +}; + +struct drm_i915_query { + __u32 num_items; + + /* +* Unused for now. +*/ + __u32 flags; + + /* +* This point to an array of num_items drm_i915_query_item structures. +*/ + __u64 items_ptr; +}; + +/* + * Data written by the kernel with query DRM_I915_QUERY_TOPOLOGY_INFO : + * + * data: contains the 3 pieces of information : + * + * - the slice mask with one bit per slice telling whether a slice is + * available. The availability of slice X can be queried with the following + * formula : + * + * (data[X / 8] >> (X % 8)) & 1 + * + * - the subslice mask for each slice with one bit per subslice tell
[Mesa-dev] [PATCH v2 1/8] intel: devinfo: store number of EUs per subslice
This will be reused to store values reported by the kernel. The main use case will be for use as the input values of the metric sets equations for the INTEL_performance_queries extension. By storing this information in the gen_device_info we make this non GL specific so this can be reused by Vulkan if we ever have an equivalent extension. Signed-off-by: Lionel Landwerlin --- src/intel/dev/gen_device_info.c | 35 +-- src/intel/dev/gen_device_info.h | 5 + 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/src/intel/dev/gen_device_info.c b/src/intel/dev/gen_device_info.c index 1773009d33c..26c2651f0ff 100644 --- a/src/intel/dev/gen_device_info.c +++ b/src/intel/dev/gen_device_info.c @@ -92,6 +92,7 @@ static const struct gen_device_info gen_device_info_i965 = { .has_negative_rhw_bug = true, .num_slices = 1, .num_subslices = { 1, }, + .num_eu_per_subslice = 8, .num_thread_per_eu = 4, .max_vs_threads = 16, .max_gs_threads = 2, @@ -110,6 +111,7 @@ static const struct gen_device_info gen_device_info_g4x = { .is_g4x = true, .num_slices = 1, .num_subslices = { 1, }, + .num_eu_per_subslice = 10, .num_thread_per_eu = 5, .max_vs_threads = 32, .max_gs_threads = 2, @@ -127,6 +129,7 @@ static const struct gen_device_info gen_device_info_ilk = { .has_surface_tile_offset = true, .num_slices = 1, .num_subslices = { 1, }, + .num_eu_per_subslice = 12, .num_thread_per_eu = 6, .max_vs_threads = 72, .max_gs_threads = 32, @@ -147,6 +150,7 @@ static const struct gen_device_info gen_device_info_snb_gt1 = { .needs_unlit_centroid_workaround = true, .num_slices = 1, .num_subslices = { 1, }, + .num_eu_per_subslice = 6, .num_thread_per_eu = 6, /* Not confirmed */ .max_vs_threads = 24, .max_gs_threads = 21, /* conservative; 24 if rendering disabled. */ @@ -174,6 +178,7 @@ static const struct gen_device_info gen_device_info_snb_gt2 = { .needs_unlit_centroid_workaround = true, .num_slices = 1, .num_subslices = { 1, }, + .num_eu_per_subslice = 12, .num_thread_per_eu = 6, /* Not confirmed */ .max_vs_threads = 60, .max_gs_threads = 60, @@ -205,6 +210,7 @@ static const struct gen_device_info gen_device_info_ivb_gt1 = { GEN7_FEATURES, .is_ivybridge = true, .gt = 1, .num_slices = 1, .num_subslices = { 1, }, + .num_eu_per_subslice = 6, .num_thread_per_eu = 6, .l3_banks = 2, .max_vs_threads = 36, @@ -232,6 +238,7 @@ static const struct gen_device_info gen_device_info_ivb_gt2 = { GEN7_FEATURES, .is_ivybridge = true, .gt = 2, .num_slices = 1, .num_subslices = { 1, }, + .num_eu_per_subslice = 12, .num_thread_per_eu = 8, /* Not sure why this isn't a multiple of * @max_wm_threads ... */ .l3_banks = 4, @@ -260,6 +267,7 @@ static const struct gen_device_info gen_device_info_byt = { GEN7_FEATURES, .is_baytrail = true, .gt = 1, .num_slices = 1, .num_subslices = { 1, }, + .num_eu_per_subslice = 4, .num_thread_per_eu = 8, .l3_banks = 1, .has_llc = false, @@ -294,6 +302,7 @@ static const struct gen_device_info gen_device_info_hsw_gt1 = { HSW_FEATURES, .gt = 1, .num_slices = 1, .num_subslices = { 1, }, + .num_eu_per_subslice = 10, .num_thread_per_eu = 7, .l3_banks = 2, .max_vs_threads = 70, @@ -321,6 +330,7 @@ static const struct gen_device_info gen_device_info_hsw_gt2 = { HSW_FEATURES, .gt = 2, .num_slices = 1, .num_subslices = { 2, }, + .num_eu_per_subslice = 10, .num_thread_per_eu = 7, .l3_banks = 4, .max_vs_threads = 280, @@ -348,6 +358,7 @@ static const struct gen_device_info gen_device_info_hsw_gt3 = { HSW_FEATURES, .gt = 3, .num_slices = 2, .num_subslices = { 2, }, + .num_eu_per_subslice = 10, .num_thread_per_eu = 7, .l3_banks = 8, .max_vs_threads = 280, @@ -398,6 +409,7 @@ static const struct gen_device_info gen_device_info_bdw_gt1 = { .is_broadwell = true, .num_slices = 1, .num_subslices = { 2, }, + .num_eu_per_subslice = 8, .num_thread_per_eu = 7, .l3_banks = 2, .max_cs_threads = 42, @@ -421,6 +433,7 @@ static const struct gen_device_info gen_device_info_bdw_gt2 = { .is_broadwell = true, .num_slices = 1, .num_subslices = { 3, }, + .num_eu_per_subslice = 8, .num_thread_per_eu = 7, .l3_banks = 4, .max_cs_threads = 56, @@ -444,6 +457,7 @@ static const struct gen_device_info gen_device_info_bdw_gt3 = { .is_broadwell = true, .num_slices = 2, .num_subslices = { 3, 3, }, + .num_eu_per_subslice = 8, .num_thread_per_eu = 7, .l3_banks = 8, .max_cs_threads = 56, @@ -468,6 +482,7 @@ static const struct gen_device_info gen_device_info_chv = { .has_integer_dword_mul = false, .num_slices = 1, .num_subslices = { 2, }, + .num_eu_per_subslice = 8, .num_thread_per_eu = 7, .l3_banks = 2, .max_vs_threa
[Mesa-dev] [PATCH v2 7/8] i965: perf: add support for new equation operators
Some equations of the CNL metrics started to use operators we haven't defined yet, just add those. Signed-off-by: Lionel Landwerlin --- src/mesa/drivers/dri/i965/brw_oa.py | 15 +++ 1 file changed, 15 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_oa.py b/src/mesa/drivers/dri/i965/brw_oa.py index 7931c825f08..06995a6bf40 100644 --- a/src/mesa/drivers/dri/i965/brw_oa.py +++ b/src/mesa/drivers/dri/i965/brw_oa.py @@ -125,6 +125,18 @@ def emit_umin(tmp_id, args): c("uint64_t tmp{0} = MIN({1}, {2});".format(tmp_id, args[1], args[0])) return tmp_id + 1 +def emit_lshft(tmp_id, args): +c("uint64_t tmp{0} = {1} << {2};".format(tmp_id, args[1], args[0])) +return tmp_id + 1 + +def emit_rshft(tmp_id, args): +c("uint64_t tmp{0} = {1} >> {2};".format(tmp_id, args[1], args[0])) +return tmp_id + 1 + +def emit_and(tmp_id, args): +c("uint64_t tmp{0} = {1} & {2};".format(tmp_id, args[1], args[0])) +return tmp_id + 1 + ops = {} # (n operands, emitter) ops["FADD"] = (2, emit_fadd) @@ -138,6 +150,9 @@ ops["UDIV"] = (2, emit_udiv) ops["UMUL"] = (2, emit_umul) ops["USUB"] = (2, emit_usub) ops["UMIN"] = (2, emit_umin) +ops["<<"] = (2, emit_lshft) +ops[">>"] = (2, emit_rshft) +ops["AND"] = (2, emit_and) def brkt(subexp): if " " in subexp: -- 2.16.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 2/8] intel: devinfo: store slice/subslice/eu masks
We want to store values coming from the kernel but as a first step, we can generate mask values out the numbers already stored in the gen_device_info masks. Signed-off-by: Lionel Landwerlin --- src/intel/dev/gen_device_info.c | 43 + src/intel/dev/gen_device_info.h | 39 - 2 files changed, 81 insertions(+), 1 deletion(-) diff --git a/src/intel/dev/gen_device_info.c b/src/intel/dev/gen_device_info.c index 26c2651f0ff..c1bdc997f2c 100644 --- a/src/intel/dev/gen_device_info.c +++ b/src/intel/dev/gen_device_info.c @@ -872,6 +872,47 @@ static const struct gen_device_info gen_device_info_icl_1x8 = { GEN11_FEATURES(1, 1, subslices(1), 6), }; +/* Generate slice/subslice/eu masks from number of + * slices/subslices/eu_per_subslices in the per generation/gt gen_device_info + * structure. + * + * These can be overridden with values reported by the kernel either from + * getparam SLICE_MASK/SUBSLICE_MASK values or from the kernel version 4.17+ + * through the i915 query uapi. + */ +static void +fill_masks(struct gen_device_info *devinfo) +{ + devinfo->slice_masks = (1UL << devinfo->num_slices) - 1; + + /* Subslice masks */ + unsigned max_subslices = 0; + for (int s = 0; s < devinfo->num_slices; s++) + max_subslices = MAX2(devinfo->num_subslices[s], max_subslices); + devinfo->subslice_slice_stride = DIV_ROUND_UP(max_subslices, 8); + + for (int s = 0; s < devinfo->num_slices; s++) { + devinfo->subslice_masks[s * devinfo->subslice_slice_stride] = + (1UL << devinfo->num_subslices[s]) - 1; + } + + /* EU masks */ + devinfo->eu_subslice_stride = DIV_ROUND_UP(devinfo->num_eu_per_subslice, 8); + devinfo->eu_slice_stride = max_subslices * devinfo->eu_subslice_stride; + + for (int s = 0; s < devinfo->num_slices; s++) { + for (int ss = 0; ss < devinfo->num_subslices[s]; ss++) { + for (int b_eu = 0; b_eu < devinfo->eu_subslice_stride; b_eu++) { +int subslice_offset = + s * devinfo->eu_slice_stride + ss * devinfo->eu_subslice_stride; + +devinfo->eu_masks[subslice_offset + b_eu] = + (((1UL << devinfo->num_eu_per_subslice) - 1) >> (b_eu * 8)) & 0xff; + } + } + } +} + bool gen_get_device_info(int devid, struct gen_device_info *devinfo) { @@ -885,6 +926,8 @@ gen_get_device_info(int devid, struct gen_device_info *devinfo) return false; } + fill_masks(devinfo); + /* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer: * * "Scratch Space per slice is computed based on 4 sub-slices. SW must diff --git a/src/intel/dev/gen_device_info.h b/src/intel/dev/gen_device_info.h index 17285ffed88..793ce094850 100644 --- a/src/intel/dev/gen_device_info.h +++ b/src/intel/dev/gen_device_info.h @@ -28,10 +28,16 @@ #include #include +#include "util/macros.h" + #ifdef __cplusplus extern "C" { #endif +#define GEN_DEVICE_MAX_SLICES (6) /* Maximum on gen10 */ +#define GEN_DEVICE_MAX_SUBSLICES(8) /* Maximum on gen11 */ +#define GEN_DEVICE_MAX_EUS_PER_SUBSLICE (10) /* Maximum on Haswell */ + /** * Intel hardware information and quirks */ @@ -112,7 +118,7 @@ struct gen_device_info /** * Number of subslices for each slice (used to be uniform until CNL). */ - unsigned num_subslices[3]; + unsigned num_subslices[GEN_DEVICE_MAX_SUBSLICES]; /** * Number of EU per subslice. @@ -124,6 +130,37 @@ struct gen_device_info */ unsigned num_thread_per_eu; + /** +* A bit mask of the slices available. +*/ + uint8_t slice_masks; + + /** +* An array of bit mask of the subslices available, use subslice_slice_stride +* to access this array. +*/ + uint8_t subslice_masks[GEN_DEVICE_MAX_SLICES * + DIV_ROUND_UP(GEN_DEVICE_MAX_SUBSLICES, 8)]; + + /** +* An array of bit mask of EUs available, use eu_slice_stride & +* eu_subslice_stride to access this array. +*/ + uint8_t eu_masks[GEN_DEVICE_MAX_SLICES * +GEN_DEVICE_MAX_SUBSLICES * +DIV_ROUND_UP(GEN_DEVICE_MAX_EUS_PER_SUBSLICE, 8)]; + + /** +* Stride to access subslice_masks[]. +*/ + uint16_t subslice_slice_stride; + + /** +* Strides to access eu_masks[]. +*/ + uint16_t eu_slice_stride; + uint16_t eu_subslice_stride; + unsigned l3_banks; unsigned max_vs_threads; /**< Maximum Vertex Shader threads */ unsigned max_tcs_threads; /**< Maximum Hull Shader threads */ -- 2.16.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 0/8] i965: add support for performance queries on CNL
Hi all, Here a v2 to enable perf queries on CNL. It moves some of the data stored into the brw_context into gen_device_info. The makes it reusable by other API (Vulkan) if they develop perf queries capabilities in the future. One of the patch is quite big, you can look at this series on my github : https://github.com/djdeath/mesa/tree/wip/djdeath/query-topology Thanks, Lionel Landwerlin (8): intel: devinfo: store number of EUs per subslice intel: devinfo: store slice/subslice/eu masks drm-uapi: bump headers intel: devinfo: meson: include drm uapi intel: devinfo: add helper functions to fill fusing masks values i965: perf: query topology i965: perf: add support for new equation operators i965: add performance query support on CNL include/drm-uapi/i915_drm.h | 146 +- src/intel/dev/gen_device_info.c | 207 +- src/intel/dev/gen_device_info.h |55 +- src/intel/dev/meson.build | 2 +- src/mesa/drivers/dri/i965/Makefile.am | 1 + src/mesa/drivers/dri/i965/Makefile.sources| 4 +- src/mesa/drivers/dri/i965/brw_oa.py |15 + src/mesa/drivers/dri/i965/brw_oa_cnl.xml | 10410 src/mesa/drivers/dri/i965/brw_performance_query.c | 185 +- src/mesa/drivers/dri/i965/meson.build | 2 +- 10 files changed, 10949 insertions(+), 78 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/brw_oa_cnl.xml -- 2.16.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 6/8] i965: perf: query topology
With the introduction of asymmetric slices in CNL, we cannot rely on the previous SUBSLICE_MASK getparam to tell userspace what subslices are available. We introduce a new uAPI in the kernel driver to report exactly what part of the GPU are fused and require this to be available on Gen10+. Prior generations can continue to rely on GETPARAM on older kernels. This patch is quite a lot of code because we have to support lots of different kernel versions, ranging from not providing any information (for Haswell on 4.13 through 4.17), to being able to query through GETPARAM (for gen8/9 on 4.13 through 4.17), to finally requiring 4.17 for Gen10+. This change stores topology information in a unified way on brw_context.topology from the various kernel APIs. And then generates the appropriate values for the equations from that unified topology. v2: Move slice/subslice masks fields to gen_device_info (Rafael) Signed-off-by: Lionel Landwerlin Acked-by: Rafael Antognolli --- src/mesa/drivers/dri/i965/brw_performance_query.c | 182 +- 1 file changed, 111 insertions(+), 71 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index 13eff31ee61..3b52db6e74e 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -1921,6 +1921,101 @@ init_oa_configs(struct brw_context *brw) } } +static bool +query_topology(struct brw_context *brw) +{ + __DRIscreen *screen = brw->screen->driScrnPriv; + struct drm_i915_query_item item = { + .query_id = DRM_I915_QUERY_TOPOLOGY_INFO, + }; + struct drm_i915_query query = { + .num_items = 1, + .items_ptr = (uintptr_t) &item, + }; + + if (drmIoctl(screen->fd, DRM_IOCTL_I915_QUERY, &query)) + return false; + + struct drm_i915_query_topology_info *topo_info = + (struct drm_i915_query_topology_info *) calloc(1, item.length); + item.data_ptr = (uintptr_t) topo_info; + + if (drmIoctl(screen->fd, DRM_IOCTL_I915_QUERY, &query) || + item.length <= 0) + return false; + + gen_device_info_update_from_topology(&brw->screen->devinfo, +topo_info); + + free(topo_info); + + return true; +} + +static bool +getparam_topology(struct brw_context *brw) +{ + __DRIscreen *screen = brw->screen->driScrnPriv; + drm_i915_getparam_t gp; + int ret; + + int slice_mask = 0; + gp.param = I915_PARAM_SLICE_MASK; + gp.value = &slice_mask; + ret = drmIoctl(screen->fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret) + return false; + + int subslice_mask = 0; + gp.param = I915_PARAM_SUBSLICE_MASK; + gp.value = &subslice_mask; + ret = drmIoctl(screen->fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret) + return false; + + gen_device_info_update_from_masks(&brw->screen->devinfo, + slice_mask, + subslice_mask, + brw->screen->eu_total); + + return true; +} + +static void +compute_topology_builtins(struct brw_context *brw) +{ + const struct gen_device_info *devinfo = &brw->screen->devinfo; + + brw->perfquery.sys_vars.slice_mask = devinfo->slice_masks; + brw->perfquery.sys_vars.n_eu_slices = devinfo->num_slices; + + for (int i = 0; i < sizeof(devinfo->subslice_masks[i]); i++) { + brw->perfquery.sys_vars.n_eu_sub_slices += + __builtin_popcount(devinfo->subslice_masks[i]); + } + + for (int i = 0; i < sizeof(devinfo->eu_masks); i++) + brw->perfquery.sys_vars.n_eus += __builtin_popcount(devinfo->eu_masks[i]); + + brw->perfquery.sys_vars.eu_threads_count = + brw->perfquery.sys_vars.n_eus * devinfo->num_thread_per_eu; + + /* At the moment the subslice mask builtin has groups of 3bits for each +* slice. +* +* Ideally equations would be updated to have a slice/subslice query +* function/operator. +*/ + brw->perfquery.sys_vars.subslice_mask = 0; + for (int s = 0; s < util_last_bit(devinfo->slice_masks); s++) { + for (int ss = 0; ss < (devinfo->subslice_slice_stride * 8); ss++) { + if (devinfo->subslice_masks[s * devinfo->subslice_slice_stride + + ss / 8] & (1UL << (ss % 8))) +brw->perfquery.sys_vars.subslice_mask |= 1UL << (s * 3 + ss); + } + } +} + static bool init_oa_sys_vars(struct brw_context *brw) { @@ -1934,83 +2029,28 @@ init_oa_sys_vars(struct brw_context *brw) if (!read_sysfs_drm_device_file_uint64(brw, "gt_max_freq_mhz", &max_freq_mhz)) return false; - brw->perfquery.sys_vars.gt_min_freq = min_freq_mhz * 100; - brw->perfquery.sys_vars.gt_max_freq = max_freq_mhz * 100; - brw->perfquery.sys_vars.timestamp_frequency = devinfo->timestamp_frequency; - - brw->perfquery.sys_vars.revision = intel_device_get_revision(screen->fd); - brw->perfquery.sys_vars.n_eu_slices = devinfo
Re: [Mesa-dev] [PATCH v3] i965/miptree: Use cpu tiling/detiling when mapping
Quoting Nanley Chery (2018-03-14 17:14:15) > On Mon, Mar 12, 2018 at 10:52:55AM -0700, Scott D Phillips wrote: > > Rename the (un)map_gtt functions to (un)map_map (map by > > returning a map) and add new functions (un)map_tiled_memcpy that > > return a shadow buffer populated with the intel_tiled_memcpy > > functions. > > > > Tiling/detiling with the cpu will be the only way to handle Yf/Ys > > tiling, when support is added for those formats. > > > > v2: Compute extents properly in the x|y-rounded-down case (Chris Wilson) > > > > v3: Add units to parameter names of tile_extents (Nanley Chery) > > Use _mesa_align_malloc for the shadow copy (Nanley) > > Continue using gtt maps on gen4 (Nanley) > > --- > > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 94 > > --- > > 1 file changed, 86 insertions(+), 8 deletions(-) > > > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > index c6213b21629..fba17bf5b7b 100644 > > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > @@ -31,6 +31,7 @@ > > #include "intel_image.h" > > #include "intel_mipmap_tree.h" > > #include "intel_tex.h" > > +#include "intel_tiled_memcpy.h" > > #include "intel_blit.h" > > #include "intel_fbo.h" > > > > @@ -3046,10 +3047,10 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree > > *mt) > > } > > > > static void > > -intel_miptree_map_gtt(struct brw_context *brw, > > - struct intel_mipmap_tree *mt, > > - struct intel_miptree_map *map, > > - unsigned int level, unsigned int slice) > > +intel_miptree_map_map(struct brw_context *brw, > > + struct intel_mipmap_tree *mt, > > + struct intel_miptree_map *map, > > + unsigned int level, unsigned int slice) > > { > > unsigned int bw, bh; > > void *base; > > @@ -3093,11 +3094,81 @@ intel_miptree_map_gtt(struct brw_context *brw, > > } > > > > static void > > -intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt) > > +intel_miptree_unmap_map(struct intel_mipmap_tree *mt) > > { > > intel_miptree_unmap_raw(mt); > > } > > > > +/* Compute extent parameters for use with tiled_memcpy functions. > > + * xs are in units of bytes and ys are in units of strides. */ > > +static inline void > > +tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map, > > + unsigned int level, unsigned int slice, unsigned int *x1_B, > > + unsigned int *x2_B, unsigned int *y1_el, unsigned int *y2_el) > > +{ > > + unsigned int block_width, block_height; > > + unsigned int x0_el, y0_el; > > + > > + _mesa_get_format_block_size(mt->format, &block_width, &block_height); > > + > > + assert(map->x % block_width == 0); > > + assert(map->y % block_height == 0); > > + > > + intel_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el); > > + *x1_B = (map->x / block_width + x0_el) * mt->cpp; > > + *y1_el = map->y / block_height + y0_el; > > + *x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * mt->cpp; > > + *y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el; > > +} > > + > > +static void > > +intel_miptree_map_tiled_memcpy(struct brw_context *brw, > > + struct intel_mipmap_tree *mt, > > + struct intel_miptree_map *map, > > + unsigned int level, unsigned int slice) > > +{ > > + unsigned int x1, x2, y1, y2; > > + tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2); > > + map->stride = _mesa_format_row_stride(mt->format, map->w); > > + map->buffer = map->ptr = _mesa_align_malloc(map->stride * (y2 - y1), > > 16); > > + > > + assert(map->ptr); > > + > > + if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { > > It looks like we'll generate extra copies using this function, but only > in a few corner cases. I think the following places should be using the > INVALIDATE flag, but aren't: > * _mesa_store_cleartexsubimage > * generate_mipmap_uncompressed > > > + char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW); > > + src += mt->offset; > > + > > It seems possible that the buffer object had a WC memory type during > rendering. In that case, we need an sfence here right? > > This stuff is pretty new to me, so perhaps others would like to chime > in. > > > + tiled_to_linear(x1, x2, y1, y2, map->ptr, src, map->stride, > > + mt->surf.row_pitch, brw->has_swizzling, > > mt->surf.tiling, > > + memcpy); > > + > > + intel_miptree_unmap_raw(mt); > > + } > > +} > > + > > +static void > > +intel_miptree_unmap_tiled_memcpy(struct brw_context *brw, > > + struct intel_mipmap_tree *mt, > > + struct intel_miptree_map *map, > > + unsigned int lev
Re: [Mesa-dev] [PATCH v3] i965/miptree: Use cpu tiling/detiling when mapping
On Mon, Mar 12, 2018 at 10:52:55AM -0700, Scott D Phillips wrote: > Rename the (un)map_gtt functions to (un)map_map (map by > returning a map) and add new functions (un)map_tiled_memcpy that > return a shadow buffer populated with the intel_tiled_memcpy > functions. > > Tiling/detiling with the cpu will be the only way to handle Yf/Ys > tiling, when support is added for those formats. > > v2: Compute extents properly in the x|y-rounded-down case (Chris Wilson) > > v3: Add units to parameter names of tile_extents (Nanley Chery) > Use _mesa_align_malloc for the shadow copy (Nanley) > Continue using gtt maps on gen4 (Nanley) > --- > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 94 > --- > 1 file changed, 86 insertions(+), 8 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > index c6213b21629..fba17bf5b7b 100644 > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > @@ -31,6 +31,7 @@ > #include "intel_image.h" > #include "intel_mipmap_tree.h" > #include "intel_tex.h" > +#include "intel_tiled_memcpy.h" > #include "intel_blit.h" > #include "intel_fbo.h" > > @@ -3046,10 +3047,10 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree *mt) > } > > static void > -intel_miptree_map_gtt(struct brw_context *brw, > - struct intel_mipmap_tree *mt, > - struct intel_miptree_map *map, > - unsigned int level, unsigned int slice) > +intel_miptree_map_map(struct brw_context *brw, > + struct intel_mipmap_tree *mt, > + struct intel_miptree_map *map, > + unsigned int level, unsigned int slice) > { > unsigned int bw, bh; > void *base; > @@ -3093,11 +3094,81 @@ intel_miptree_map_gtt(struct brw_context *brw, > } > > static void > -intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt) > +intel_miptree_unmap_map(struct intel_mipmap_tree *mt) > { > intel_miptree_unmap_raw(mt); > } > > +/* Compute extent parameters for use with tiled_memcpy functions. > + * xs are in units of bytes and ys are in units of strides. */ > +static inline void > +tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map, > + unsigned int level, unsigned int slice, unsigned int *x1_B, > + unsigned int *x2_B, unsigned int *y1_el, unsigned int *y2_el) > +{ > + unsigned int block_width, block_height; > + unsigned int x0_el, y0_el; > + > + _mesa_get_format_block_size(mt->format, &block_width, &block_height); > + > + assert(map->x % block_width == 0); > + assert(map->y % block_height == 0); > + > + intel_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el); > + *x1_B = (map->x / block_width + x0_el) * mt->cpp; > + *y1_el = map->y / block_height + y0_el; > + *x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * mt->cpp; > + *y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el; > +} > + > +static void > +intel_miptree_map_tiled_memcpy(struct brw_context *brw, > + struct intel_mipmap_tree *mt, > + struct intel_miptree_map *map, > + unsigned int level, unsigned int slice) > +{ > + unsigned int x1, x2, y1, y2; > + tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2); > + map->stride = _mesa_format_row_stride(mt->format, map->w); > + map->buffer = map->ptr = _mesa_align_malloc(map->stride * (y2 - y1), 16); > + > + assert(map->ptr); > + > + if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { It looks like we'll generate extra copies using this function, but only in a few corner cases. I think the following places should be using the INVALIDATE flag, but aren't: * _mesa_store_cleartexsubimage * generate_mipmap_uncompressed > + char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW); > + src += mt->offset; > + It seems possible that the buffer object had a WC memory type during rendering. In that case, we need an sfence here right? This stuff is pretty new to me, so perhaps others would like to chime in. > + tiled_to_linear(x1, x2, y1, y2, map->ptr, src, map->stride, > + mt->surf.row_pitch, brw->has_swizzling, > mt->surf.tiling, > + memcpy); > + > + intel_miptree_unmap_raw(mt); > + } > +} > + > +static void > +intel_miptree_unmap_tiled_memcpy(struct brw_context *brw, > + struct intel_mipmap_tree *mt, > + struct intel_miptree_map *map, > + unsigned int level, > + unsigned int slice) > +{ > + if (map->mode & GL_MAP_WRITE_BIT) { > + unsigned int x1, x2, y1, y2; > + tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2); > + > + char *dst = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW); > + dst
Re: [Mesa-dev] [PATCH] meson: require amdgpu >= 2.4.91
For the series: Reviewed-by: Marek Olšák You can push the series now if you want to. That would be simplest. Marek On Wed, Mar 14, 2018 at 12:10 PM, Dylan Baker wrote: > Signed-off-by: Dylan Baker > > --- > > Marek, can you either squash this into your other patch or push this along > with > that? > > meson.build | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/meson.build b/meson.build > index 51b470253f5..e21ac74a1e3 100644 > --- a/meson.build > +++ b/meson.build > @@ -1036,7 +1036,7 @@ dep_libdrm_nouveau = [] > dep_libdrm_etnaviv = [] > dep_libdrm_freedreno = [] > if with_amd_vk or with_gallium_radeonsi > - dep_libdrm_amdgpu = dependency('libdrm_amdgpu', version : '>= 2.4.90') > + dep_libdrm_amdgpu = dependency('libdrm_amdgpu', version : '>= 2.4.91') > endif > if (with_gallium_radeonsi or with_dri_r100 or with_dri_r200 or > with_gallium_r300 or with_gallium_r600) > -- > 2.16.2 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] autotools+dri3: allow building against older xcb (v4)
Hi, On Wednesday, 14 March 2018 15:51:03 CET Brian Paul wrote: > FWIW, I'd like to see this sooner rather than later too. I spent > several hours yesterday trying to update our build script to > build/install XCB 1.13 on Fedora, Ubuntu, etc. without totally succeeding. I did just rebuild the 1.13 srpm from koji.fedora... on fedora27. I have put those I have into my freedesktop home directory: /home/frohlich/xcb-1.13-rpms Not that I want to maintain them, but to share what at this current minute helps for me. IMO not just requiring the most recent version of such a system library would be a very good thing! best Mathias ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] meson+dri3: allow building against older xcb (v3)
Quoting Dylan Baker (2018-03-14 09:42:36) > Quoting Rob Clark (2018-03-14 06:04:58) > > Similar to previous patch, make xcb 1.13 optional. > > > > Signed-off-by: Rob Clark > > --- > > meson.build | 11 --- > > 1 file changed, 8 insertions(+), 3 deletions(-) > > > > diff --git a/meson.build b/meson.build > > index c201644c372..0e2f73e67b6 100644 > > --- a/meson.build > > +++ b/meson.build > > @@ -1235,9 +1235,14 @@ if with_platform_x11 > > dep_xcb_dri2 = dependency('xcb-dri2', version : '>= 1.8') > > > > if with_dri3 > > - pre_args += ['-DHAVE_DRI3', '-DHAVE_DRI3_MODIFIERS'] > > - dep_xcb_dri3 = dependency('xcb-dri3', version : '>= 1.13') > > - dep_xcb_present = dependency('xcb-present', version: '>= 1.13') > > + pre_args += '-DHAVE_DRI3' > > + dep_xcb_dri3 = dependency('xcb-dri3') > > + dep_xcb_present = dependency('xcb-present') > > + # until xcb-dri3 has been around long enough to make a > > hard-dependency: > > + if (dep_xcb_dri3.version().version_compare('>= 1.13') and > > + dep_xcb_present.version().version_compare('>= 1.13')) > > +pre_args += '-DHAVE_DRI3_MODIFIERS' > > + endif > >dep_xcb_sync = dependency('xcb-sync') > >dep_xshmfence = dependency('xshmfence', version : '>= 1.1') > > endif > > -- > > 2.14.3 > > > > I don't care about building against old versions of xcb either, but this is > very > minimally intrusive so I don't have a problem with it, > > Reviewed-by: Dylan Baker Oh, just this patch, I'm not qualified to review the first one :) Dylan signature.asc Description: signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] meson+dri3: allow building against older xcb (v3)
Quoting Rob Clark (2018-03-14 06:04:58) > Similar to previous patch, make xcb 1.13 optional. > > Signed-off-by: Rob Clark > --- > meson.build | 11 --- > 1 file changed, 8 insertions(+), 3 deletions(-) > > diff --git a/meson.build b/meson.build > index c201644c372..0e2f73e67b6 100644 > --- a/meson.build > +++ b/meson.build > @@ -1235,9 +1235,14 @@ if with_platform_x11 > dep_xcb_dri2 = dependency('xcb-dri2', version : '>= 1.8') > > if with_dri3 > - pre_args += ['-DHAVE_DRI3', '-DHAVE_DRI3_MODIFIERS'] > - dep_xcb_dri3 = dependency('xcb-dri3', version : '>= 1.13') > - dep_xcb_present = dependency('xcb-present', version: '>= 1.13') > + pre_args += '-DHAVE_DRI3' > + dep_xcb_dri3 = dependency('xcb-dri3') > + dep_xcb_present = dependency('xcb-present') > + # until xcb-dri3 has been around long enough to make a hard-dependency: > + if (dep_xcb_dri3.version().version_compare('>= 1.13') and > + dep_xcb_present.version().version_compare('>= 1.13')) > +pre_args += '-DHAVE_DRI3_MODIFIERS' > + endif >dep_xcb_sync = dependency('xcb-sync') >dep_xshmfence = dependency('xshmfence', version : '>= 1.1') > endif > -- > 2.14.3 > I don't care about building against old versions of xcb either, but this is very minimally intrusive so I don't have a problem with it, Reviewed-by: Dylan Baker signature.asc Description: signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105510] Unable to build swrAVX@sha/rasterizer_core_threads.cpp
https://bugs.freedesktop.org/show_bug.cgi?id=105510 Clayton Craft changed: What|Removed |Added Status|NEW |RESOLVED Resolution|--- |FIXED --- Comment #1 from Clayton Craft --- The offending commit was reverted. -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] configure.ac: blacklist libdrm 2.4.90
On 14 March 2018 at 01:03, Marek Olšák wrote: > From: Marek Olšák > > Cc: 18.0 17.3 17.2 > --- > configure.ac | 7 +++ > 1 file changed, 7 insertions(+) > > diff --git a/configure.ac b/configure.ac > index 621dc32..e29ce68 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -2601,20 +2601,27 @@ if test -n "$with_gallium_drivers"; then > radeon_llvm_check $LLVM_REQUIRED_R600 "r600" > > llvm_add_component "asmparser" "r600" > llvm_add_component "bitreader" "r600" > fi > ;; > xradeonsi) > HAVE_GALLIUM_RADEONSI=yes > PKG_CHECK_MODULES([RADEON], [libdrm >= $LIBDRM_RADEON_REQUIRED > libdrm_radeon >= $LIBDRM_RADEON_REQUIRED]) > PKG_CHECK_MODULES([AMDGPU], [libdrm >= $LIBDRM_AMDGPU_REQUIRED > libdrm_amdgpu >= $LIBDRM_AMDGPU_REQUIRED]) > + > +# Blacklist libdrm_amdgpu 2.4.90 because it breaks older radeonsi > +libdrm_version=`pkg-config libdrm_amdgpu --modversion` > +if test "x$libdrm_version" = x2.4.90; then > +AC_MSG_ERROR([radeonsi can't use libdrm 2.4.90 due to a > compatibility issue. Use a newer or older version.]) > +fi > + Please include a reference next to the check. Pretty much anything will do - fd.o/other bug report, ML thread, failing app, other. With that the series is: Reviewed-by: Emil Velikov Thanks Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105396] tc compatible htile sets depth of htiles of discarded fragments to 1.0
https://bugs.freedesktop.org/show_bug.cgi?id=105396 --- Comment #2 from James Legg --- https://patchwork.freedesktop.org/patch/208935/ fixes it for me on my RX 480, but I haven't had any reviews on that patch yet and I'm not sure if I'm heading in the right direction. It would also be good to test this on other GPUs including Vega. -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] meson: radeonsi cannot be built with drm 2.4.89
Signed-off-by: Dylan Baker Cc: 18.0 17.3 17.2 Cc: Emil Vilikov --- Emil, I don't know what the appropriate thing to do is here, this is the meson equivalent of Marek's first patch, but this doesn't make any sense on master. meson.build | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 7ecd1fd67de..cb43c82c908 100644 --- a/meson.build +++ b/meson.build @@ -979,7 +979,9 @@ dep_libdrm_nouveau = [] dep_libdrm_etnaviv = [] dep_libdrm_freedreno = [] if with_amd_vk or with_gallium_radeonsi - dep_libdrm_amdgpu = dependency('libdrm_amdgpu', version : '>= 2.4.89') + dep_libdrm_amdgpu = dependency( +'libdrm_amdgpu', version : ['>= 2.4.89', '!= 2.4.90'] + ) endif if (with_gallium_radeonsi or with_dri_r100 or with_dri_r200 or with_gallium_r300 or with_gallium_r600) -- 2.16.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] meson: require amdgpu >= 2.4.91
Signed-off-by: Dylan Baker --- Marek, can you either squash this into your other patch or push this along with that? meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 51b470253f5..e21ac74a1e3 100644 --- a/meson.build +++ b/meson.build @@ -1036,7 +1036,7 @@ dep_libdrm_nouveau = [] dep_libdrm_etnaviv = [] dep_libdrm_freedreno = [] if with_amd_vk or with_gallium_radeonsi - dep_libdrm_amdgpu = dependency('libdrm_amdgpu', version : '>= 2.4.90') + dep_libdrm_amdgpu = dependency('libdrm_amdgpu', version : '>= 2.4.91') endif if (with_gallium_radeonsi or with_dri_r100 or with_dri_r200 or with_gallium_r300 or with_gallium_r600) -- 2.16.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] autotools+dri3: allow building against older xcb (v4)
On 14 March 2018 at 14:51, Brian Paul wrote: > On 03/14/2018 08:40 AM, Rob Clark wrote: >> >> On Wed, Mar 14, 2018 at 10:29 AM, Emil Velikov >> wrote: >>> >>> On 14 March 2018 at 13:04, Rob Clark wrote: From: Dave Airlie I'm not sure everyone wants to be updating their dri3 in a forced march setting, this allows a nicer approach, esp when you want to build on distro that aren't brand new. I'm sure there are plenty of ways this patch could be cleaner, and I've also not built it against an updated dri3. For meson I've just left it alone, since if you are using meson you probably don't mind xcb updates, and if you are using meson you can fix this better than me. v3: just don't put a version in for dri3/present without modifiers, should allow building with 1.11 as well v4: small fix to meson build (feel free to supply meson followups) >>> IIRC Matt seemed also on board with making the new xcb a hard >>> requirement. >>> >>> I don't know the exact usecase he was thinking, yet gut feeling >>> suggests that my earlier suggestion [1] should work for everyone - >>> Dave, Matt, Marek, etc. >>> If the concern is writing the code - I can help ;-) >>> >> >> I care *significantly* less about the build against 1.12, run against >> 1.13 case than I do about getting back to the point where I don't have >> to carry around these patches to build mesa (otherwise it is just a >> matter of time before I accidentally push them just because I needed >> them to test whatever it was that I was intending to push ;-) >> >> If someone wants to build on top of this and make something more >> fancy, by all means. But I really would like to push something that >> removes the 1.13 dependency like today(ish), whether that be this >> patchset or reverting the patches that added the 1.13 dependency and >> trying again later. > > > FWIW, I'd like to see this sooner rather than later too. I spent several > hours yesterday trying to update our build script to build/install XCB 1.13 > on Fedora, Ubuntu, etc. without totally succeeding. > I could have save you some time, if the script is available somewhere ;-) FTR the oibaf repo has the Ubuntu bits.. Up-to date distros like Gentoo, Arch are fine as well - doubt they are your target audience, though. -Emil [1] https://launchpad.net/~oibaf/+archive/ubuntu/graphics-drivers ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105510] Unable to build swrAVX@sha/rasterizer_core_threads.cpp
https://bugs.freedesktop.org/show_bug.cgi?id=105510 Bug ID: 105510 Summary: Unable to build swrAVX@sha/rasterizer_core_threads.cpp Product: Mesa Version: git Hardware: Other OS: All Status: NEW Keywords: regression Severity: normal Priority: medium Component: Drivers/Gallium/swr Assignee: mesa-dev@lists.freedesktop.org Reporter: clayton.a.cr...@intel.com QA Contact: mesa-dev@lists.freedesktop.org I have bisected this failure to the following commit: commit de0d10db93d85de79c7b4451c4851ace2976f8f4 Author: Apple SWE Date: Tue Mar 13 18:24:26 2018 -0700 Add processor topology calculation implementation for Darwin/OSX targets. Full output from build failure: 23:00:35 FAILED: src/gallium/drivers/swr/swrAVX@sha/rasterizer_core_threads.cpp.o 23:00:35 ccache g++ -Isrc/gallium/drivers/swr/swrAVX@sha -Isrc/gallium/drivers/swr -I../src/gallium/drivers/swr -Isrc/gallium/drivers/swr/rasterizer -I../src/gallium/drivers/swr/rasterizer -I../src/gallium/drivers/swr/rasterizer/archrast -Isrc/gallium/drivers/swr/rasterizer/jitter -I../src/gallium/drivers/swr/rasterizer/jitter -Isrc/gallium/drivers/swr/rasterizer/core -I../src/gallium/drivers/swr/rasterizer/core -Isrc/gallium/drivers/swr/rasterizer/codegen -I../src/gallium/drivers/swr/rasterizer/codegen -Isrc/gallium/drivers/swr/rasterizer/core/backends -I/usr/lib/llvm-4.0/include -fdiagnostics-color=always -pipe -D_FILE_OFFSET_BITS=64 -Wall -Winvalid-pch -Wnon-virtual-dtor -std=c++11 -O2 -g '-DVERSION="18.1.0-devel"' -DPACKAGE_VERSION=VERSION '-DPACKAGE_BUGREPORT="https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa";' -DGLX_USE_TLS -DHAVE_ST_VDPAU -DENABLE_ST_OMX_BELLAGIO -DHAVE_X11_PLATFORM -DGLX_INDIRECT_RENDERING -DGLX_DIRECT_RENDERING -DGLX_USE_DRM -DHAVE_DRM_PLATFORM -DHAVE_SURFACELESS_PLATFORM -DENABLE_SHADER_CACHE -DHAVE___BUILTIN_BSWAP32 -DHAVE___BUILTIN_BSWAP64 -DHAVE___BUILTIN_CLZ -DHAVE___BUILTIN_CLZLL -DHAVE___BUILTIN_CTZ -DHAVE___BUILTIN_EXPECT -DHAVE___BUILTIN_FFS -DHAVE___BUILTIN_FFSLL -DHAVE___BUILTIN_POPCOUNT -DHAVE___BUILTIN_POPCOUNTLL -DHAVE___BUILTIN_UNREACHABLE -DHAVE_FUNC_ATTRIBUTE_CONST -DHAVE_FUNC_ATTRIBUTE_FLATTEN -DHAVE_FUNC_ATTRIBUTE_MALLOC -DHAVE_FUNC_ATTRIBUTE_PURE -DHAVE_FUNC_ATTRIBUTE_UNUSED -DHAVE_FUNC_ATTRIBUTE_WARN_UNUSED_RESULT -DHAVE_FUNC_ATTRIBUTE_WEAK -DHAVE_FUNC_ATTRIBUTE_FORMAT -DHAVE_FUNC_ATTRIBUTE_PACKED -DHAVE_FUNC_ATTRIBUTE_RETURNS_NONNULL -DHAVE_FUNC_ATTRIBUTE_VISIBILITY -DHAVE_FUNC_ATTRIBUTE_ALIAS -DHAVE_FUNC_ATTRIBUTE_NORETURN -DUSE_SSE41 -DUSE_GCC_ATOMIC_BUILTINS -DUSE_X86_64_ASM -DMAJOR_IN_SYSMACROS -DHAVE_SYS_SYSCTL_H -DHAVE_LINUX_FUTEX_H -DHAVE_STRTOF -DHAVE_MKOSTEMP -DHAVE_POSIX_MEMALIGN -DHAVE_TIMESPEC_GET -DHAVE_MEMFD_CREATE -DHAVE_STRTOD_L -DHAVE_DLADDR -DHAVE_DL_ITERATE_PHDR -DHAVE_LIBDRM -DHAVE_ZLIB -DHAVE_PTHREAD -DHAVE_LLVM=0x0400 -DMESA_LLVM_VERSION_PATCH=1 -DHAVE_WAYLAND_PLATFORM -DWL_HIDE_DEPRECATED -DHAVE_DRI3 -Wall -fno-math-errno -fno-trapping-math -Wno-non-virtual-dtor -fPIC -D__STDC_CONSTANT_MACROS -D_GNU_SOURCE -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -pthread -fvisibility=hidden -fno-strict-aliasing -mavx -DKNOB_ARCH=KNOB_ARCH_AVX -MD -MQ 'src/gallium/drivers/swr/swrAVX@sha/rasterizer_core_threads.cpp.o' -MF 'src/gallium/drivers/swr/swrAVX@sha/rasterizer_core_threads.cpp.o.d' -o 'src/gallium/drivers/swr/swrAVX@sha/rasterizer_core_threads.cpp.o' -c ../src/gallium/drivers/swr/rasterizer/core/threads.cpp 23:00:35 ../src/gallium/drivers/swr/rasterizer/core/threads.cpp:268:18: error: ‘sysctlbyname’ was not declared in this scope 23:00:35 int result = sysctlbyname("hw.packages", &value, &size, NULL, 0); 23:00:35 ^~~~ 23:00:35 ../src/gallium/drivers/swr/rasterizer/core/threads.cpp:268:18: note: suggested alternative: ‘ttyname’ 23:00:35 int result = sysctlbyname("hw.packages", &value, &size, NULL, 0); 23:00:35 ^~~~ 23:00:35 ttyname 23:00:35 In file included from ../src/gallium/drivers/swr/rasterizer/common/os.h:267:0, 23:00:35 from ../src/gallium/drivers/swr/rasterizer/core/threads.cpp:44: 23:00:35 ../src/gallium/drivers/swr/rasterizer/common/swr_assert.h:65:26: error: expected unqualified-id before ‘do’ 23:00:35 #define _SWR_MACRO_START do { 23:00:35 ^ 23:00:35 ../src/gallium/drivers/swr/rasterizer/common/swr_assert.h:131:5: note: in expansion of macro ‘_SWR_MACRO_START’ 23:00:35 _SWR_MACRO_START \ 23:00:35 ^~~~ 23:00:35 ../src/gallium/drivers/swr/rasterizer/common/swr_assert.h:151:41: note: in expansion of macro ‘_SWR_ASSERT’ 23:00:35 #define SWR_ASSERT(e, ...) _SWR_ASSERT(true, e, ##__VA_ARGS__) 23:00:35 ^~~ 23:00:35 ../src/gallium/drivers/swr/rasterizer/core/threads.cpp:269:5: note: in expansion of macro ‘SWR_ASSERT’ 23:00:35 SWR_
Re: [Mesa-dev] [PATCH 2/2] fixup! dri3: allow building against older xcb (v3)
Quoting Dylan Baker (2018-03-13 19:45:37) > Wrap it in parens and it can span multiple lines > > On March 13, 2018 5:40:15 PM PDT, Rob Clark wrote: > > On Tue, Mar 13, 2018 at 7:47 PM, Rob Clark wrote: > On Tue, Mar 13, 2018 at 7:27 PM, Rob Clark > wrote: > On Tue, Mar 13, 2018 at 7:10 PM, Dylan Baker > wrote: > Quoting Rob Clark (2018-03-13 16:04:00) > --- > I'm a bit unsure about the xcb-present version > dependency, as that was > added in a different commit. OTOH I guess Dave is > building vulkan with > his patch so it is perhaps not a built-time dependency. > > meson.build | 11 --- > 1 file changed, 8 insertions(+), 3 deletions(-) > > diff --git a/meson.build b/meson.build > index c201644c372..30f1919e6f5 100644 > --- a/meson.build > +++ b/meson.build > @@ -1235,9 +1235,14 @@ if with_platform_x11 > dep_xcb_dri2 = dependency('xcb-dri2', version : '>= > 1.8') > > if with_dri3 > - pre_args += ['-DHAVE_DRI3', > '-DHAVE_DRI3_MODIFIERS'] > - dep_xcb_dri3 = dependency('xcb-dri3', version : > '>= 1.13') > - dep_xcb_present = dependency('xcb-present', > version: '>= 1.13') > + pre_args += '-DHAVE_DRI3' > + dep_xcb_dri3 = dependency('xcb-dri3') > + dep_xcb_present = dependency('xcb-present') > + # until xcb-dri3 has been around long enough to > make a hard-dependency: > + dep_xcb_dri3_modifiers = dependency('xcb-dri3', > version : '>= 1.13', required : false) > + if dep_xcb_dri3_modifiers.found() > > I think you could simplify this by doing: > > if dep_xcb_dri3.version().version_compare('>= 1.13') > > ahh, yeah, and I guess that will get rid of the confusing error > msg > about xcb-dri3 1.13 not found.. > > > > Or should we be checking for xcb_dri3 and xcb_present >= > 1.13? > > I'm not entirely sure why we were checking for xcb-present >= > 1.13.. > if that is actually a build time requirement then I think Dave's > initial patch needs some more ifdef.. (but otoh, if it was, I > guess > he would have noticed.) > > Anyways, I did a build w/ anv+radv enabled with xcb-present == > 1.12 > (and xcb-dri3 1.12).. and > 61309c2a727d52d543207d6ae79fcb3e68b5cff3 > looks like it just cares about >= 1.12 of both of those > (although not > sure if it is a compile time dependency). > > So *possibly* for both meson and autotools we should require > 1.12, and > optionally 1.13 for HAVE_DRI3_MODIFIERS? > > > so mystery solved, Dave #ifdef'd out the present dependencies too ;-) > > so this is what I end up with: > > @@ -1235,9 +1235,14 @@ if with_platform_x11 > dep_xcb_dri2 = dependency('xcb-dri2', version : '>= 1.8') > > if with_dri3 > - pre_args += ['-DHAVE_DRI3', '-DHAVE_DRI3_MODIFIERS'] > - dep_xcb_dri3 = dependency('xcb-dri3', version : '>= 1.13') > - dep_xcb_present = dependency('xcb-present', version: '>= > 1.13') > + pre_args += '-DHAVE_DRI3' > + dep_xcb_dri3 = dependency('xcb-dri3') > + dep_xcb_present = dependency('xcb-present') > + # until xcb-dri3 has been around long enough to make a > hard-dependency: > + if dep_xcb_dri3.version().version_compare('>= 1.13') and > + dep_xcb_present.version().version_compare('>= 1.13') Sorry, I was replying form mobile last night, if (dep_xcb_dri3.version().version_compare('>= 1.13') and dep_xcb_present.version().version_compare('>= 1.13')) will work. Meson's recursive descent parser sometimes leaves something to be desired. > > hmm, annoyingly enough I found that on rawhide (meson 0.45.0), I seem > to need the entire if statement on a single line, instead of split in > two like this. > > BR, > -R > > +pre_args += '-DHAVE_DRI3_MODIFIERS' > + endif > dep_xcb_sync = dependency('xcb-sync') > dep_xshmfence = dependency('xshmfence', version : '>= 1.1') > endif > signature.asc Description: signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC] Mesa release improvements - Feature and Stable releases
On 14 March 2018 at 11:20, Andres Gomez wrote: > Hi, > > On Mon, 2018-03-12 at 18:02 +, Emil Velikov wrote: >> Hi Andres, >> >> On 12 March 2018 at 15:57, Andres Gomez wrote: >> > > > [...] > >> > >> > 18.1 example: >> > >> >1. Create a Metabug for the 18.1 branch point. >> >2. Announce the Metabug in mesa-dev and give 1 week (?) for developers >> > to complete their features. Advice to block the Metabug with other >> > feature bugs. >> >3. Developers create bugs with the WIP features they want to include in >> > 18.1 and block the Metabug. >> >4. After 1 week, check the status >> >* If there are no blockers, close the Metabug and create the 18.1 >> > branch point. >> >* If there are blockers; coordinate with the developers of the >> > blockers and decide whether to give a bit more of margin if the >> > feature is almost complete or just remove the blocking bugs >> > leaving the WIP features out, close the Metabug and create the >> > 18.1 branch point. >> >5. Release 18.1-0-rc1. >> >6. Create a Metabug to track the status of the final 18.1.0 release. >> >7. Block this Metabug with regressions found from 18.1.0-rcX. >> >8. Once we reach stability, close the Metabug and announce the final >> > release of 18.1.0. >> > >> >> I might sound a bit negative, yet I'm not sure what this brings us. >> Can you please elaborate? >> >> The original goal is to have the time based releases, as opposed to >> feature ones. >> That was reiterated by developers not too long ago. > > Ugh! > > I had very similar comments from Juan, so I may have explained myself > very badly ... > Guessing that I might have read more than what was said :-\ >> So far, there has been an announcement email 2-4 weeks before the >> branch point, aiming to: >> - remind, and >> - seek feedback about required features >> >> The email was also followed by weekly ping/reminder. >> >> IIRC suggestions and requests that are made in timely fashion* have >> always been accepted. >> If we're adopt the above approach, this will: >> - lead to noticeable delays in the branch point, which combined with >> - the current delays getting the blocking bugs fixed. equals >> - even greater delays and less time based releases >> >> Furthermore I'm a bit worried that this might have negative impact on >> developers: >> I don't know any instances, yet some developers may put extra pressure >> on themselves trying to get 'too many' features merged. Leading to >> stress, burn out and others. >> >> >> Perhaps we can somehow utilise your suggestion while ensuring that my >> grim 'predictions' do not come true? > > My suggestion is not to change the paradigm (time based vs feature > based releases) but rather to have better visibility of how the time > based feature releases are done. > > In other words, I'm not expecting to delay the time of the branchpoint. > I still believe we can have tiny flexibility for features that are just > about to land. I also believe this is the current way we are working, > isn't it? > > The proposal only intends to have a central point (a Metabug) in which > to track the status of the branch point rather than just in several > mails and in multiple pings which may happen by different ways (mail, > IRC, ... ?). > > And the same for tracking the final release. > > WDYT? Is this too complicated or time consuming for the release manager > at the given time? Do you think it would be useful? > Just double-checking: I would suspect you're not suggesting removing the existing email/poke scheme? Providing another means to devs to track/handle things is good IMHO. Whether developers will like it is up-to them. Everyone, your input is appreciated! I'm slightly worried that it might cause extra confusion. Some crude examples follow: - I don't use bugzilla/etc to track my feature work - most teams - Do I open another bug, or list my feature in the metabug - seeming an ongoing theme with metabugs - Do I add the bug, reply to the email or both -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/6] spirv/radv: add AMD_gcn_shader capability, remove current extensions
On 14/03/18 16:08, Daniel Schürmann wrote: > > On 14.03.2018 16:03, Alejandro Piñeiro wrote: >> On 14/03/18 15:55, Daniel Schürmann wrote: >>> Not sure, if I'm asked here :) >>> As AMD_gcn_shader seems to be the only extension without new >>> capability, >>> I am fine with just handling it as if. >> Well, I was exactly asking this, if everybody involved is fine with >> this. Bonus points to get a review to this patch. >> >>> Additionally, we might want to rename it to gcn_shader to be consistent >>> (or add the vendor names to all capabilities). >> Makes sense. >> >>> Do you want to introduce one field per capability or have some >>> capabilities merged (like now)? >> Which capabilities are merged? > storage_16bit: SpvCapabilityStorageUniformBufferBlock16, > SpvCapabilityStorageUniform16, SpvCapabilityStoragePushConstant16, > SpvCapabilityStorageInputOutput16 > variable_pointers: SpvCapabilityVariablePointersStorageBuffer, > SpvCapabilityVariablePointers > subgroup_arithmetic: SpvCapabilityGroupNonUniformArithmetic, > SpvCapabilityGroupNonUniformClustered > subgroup_shuffle: SpvCapabilityGroupNonUniformShuffle, > SpvCapabilityGroupNonUniformShuffleRelative > tessellation: SpvCapabilityTessellation, > SpvCapabilityTessellationPointSize Oh true. Thanks for the detailed list. So now replying to your question: I think that it would be better to keep capabilities merged. Mostly because it is working right now, and I don't see any big advantage to start to split it, unless we want start to fine-grain spirv_to_nir support for each capability defined at each extension, and that seems a little overkill. >>> >>> On 11.03.2018 16:25, Alejandro Piñeiro wrote: FWIW, this is the patch that Im more interested to get a review. It is also the one that probably would need some discussion. Fortunately this one can be reviewed independently of the rest of the patches, so the others can wait a little. Getting this into would make the rebase of this series more easy. So: ping (please) On 08/03/18 16:00, Alejandro Piñeiro wrote: > So now, during spirv_to_nir, it uses the capability instead of the > extension. Note that we are really doing here is treating > SPV_AMD_gcn_shader as other supported extensions. SPV_AMD_gcn_shader > is not the first SPV extension supported. For example, the capability > draw_parameters infers if the extension > SPV_KHR_shader_draw_parameters > is supported or not. > > This could be seen as counter-intuitive, and that it would be easier > to define which extensions are supported, and based our checks on > that, but we need to take into account that some capabilities are > optional from core, and others came from new extensions. > > Also this commit would make the implementation of > ARB_spirv_extensions > easier. > --- > > Note that I'm aware that this can be somewhat confusing at first. But > most of the SPV extensions defines a new capability, so it makes > sense > to add one, and compute the other based on that. As I mention on a > different patch on this series, it was easier to compute extensions > from capabilities, instead of the other way around, because core > SPIR-V defines optional capabilities without the need of an > extension. > > Having said so, I have read the SPV_AMD_gcn_shader, and it doesn't > define a new capability (the first one I see that doesn't do > that), so > I'm somewhat forcing that here. > > > src/amd/vulkan/radv_shader.c | 2 -- > src/compiler/shader_info.h | 4 > src/compiler/spirv/nir_spirv.h | 1 - > src/compiler/spirv/spirv_to_nir.c | 2 +- > 4 files changed, 1 insertion(+), 8 deletions(-) > > diff --git a/src/amd/vulkan/radv_shader.c > b/src/amd/vulkan/radv_shader.c > index 85672e600d7..46017290654 100644 > --- a/src/amd/vulkan/radv_shader.c > +++ b/src/amd/vulkan/radv_shader.c > @@ -214,8 +214,6 @@ radv_shader_compile_to_nir(struct radv_device > *device, > .multiview = true, > .subgroup_basic = true, > .variable_pointers = true, > - }, > - .exts = { > .AMD_gcn_shader = true, > }, > }; > diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h > index b1e200070f7..502b7901370 100644 > --- a/src/compiler/shader_info.h > +++ b/src/compiler/shader_info.h > @@ -51,10 +51,6 @@ struct spirv_supported_capabilities { > bool subgroup_quad; > bool subgroup_shuffle; > bool subgroup_vote; > -}; > - > -/* The supported extensions which add extended instructions */ > -struct spirv_supported_extensions { > bool AMD_gcn_shader; > }; > diff --git a/src/compiler/sp
Re: [Mesa-dev] soft fp64 support - main body (glsl/gallium)
On Tue, Mar 13, 2018 at 04:54:27PM -0700, Matt Turner wrote: > On Mon, Mar 12, 2018 at 9:24 PM, Dave Airlie wrote: > > This is the main code for the soft fp64 work. It's mostly Elie's > > code with a bunch of changes by me. > > > > This patchset has all the glsl lowering code. (using float64.glsl, > > yes I know checked in files are bad, but not bad enough for anyone > > to have solved int64.glsl yet, so we have a precedent). > Hi Matt > Have you thought about making a NIR backend for R600? > > Elie sent patches for lowering fp64 operations in NIR, and it's what > I'm going to start from when I do the analogous project for some > future Intel hardware. It's sad to duplicate all of this code, much > less all of this effort. Let me know when you start this project, I will be happy to help. Some (bad) idea here. Can we wire the GLSL IR version for your hardware? Once the NIR version finished, we just remove it. > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] nir/vtn: fix OpConvertXToY
Samuel sent out a patch to fix this already which I've reviewed. Feel free to merge it. I gave him comments on the second so maybe best to leave that one be. That said, I do sort-of like your macro... On Wed, Mar 14, 2018 at 8:08 AM, Rob Clark wrote: > These opcodes don't care about src/dst type, only src/dst size. > > Signed-off-by: Rob Clark > --- > src/compiler/spirv/vtn_alu.c | 14 ++ > 1 file changed, 10 insertions(+), 4 deletions(-) > > diff --git a/src/compiler/spirv/vtn_alu.c b/src/compiler/spirv/vtn_alu.c > index f0b69b38f83..1ca1f951200 100644 > --- a/src/compiler/spirv/vtn_alu.c > +++ b/src/compiler/spirv/vtn_alu.c > @@ -349,11 +349,17 @@ vtn_nir_alu_op_for_spirv_opcode(struct vtn_builder > *b, > > /* Conversions: */ > case SpvOpQuantizeToF16: return nir_op_fquantize2f16; > + > +#define nir_type_cast(type, basetype) (nir_alu_type_get_type_size(type) > | nir_type_ ## basetype) > +#define typed_conversion_op(src, srctype, dst, dsttype) \ > + nir_type_conversion_op(nir_type_cast(src, srctype), > nir_type_cast(dst, dsttype), nir_rounding_mode_undef) > + > + case SpvOpConvertFToU: return typed_conversion_op(src, float, dst, > uint); > + case SpvOpConvertFToS: return typed_conversion_op(src, float, dst, > int); > + case SpvOpConvertSToF: return typed_conversion_op(src, int, dst, > float); > + case SpvOpConvertUToF: return typed_conversion_op(src, uint, dst, > float); > + > case SpvOpUConvert: > - case SpvOpConvertFToU: > - case SpvOpConvertFToS: > - case SpvOpConvertSToF: > - case SpvOpConvertUToF: > case SpvOpSConvert: > case SpvOpFConvert: >return nir_type_conversion_op(src, dst, nir_rounding_mode_undef); > -- > 2.14.3 > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] nir/vtn: fix OpConvertXToY
These opcodes don't care about src/dst type, only src/dst size. Signed-off-by: Rob Clark --- src/compiler/spirv/vtn_alu.c | 14 ++ 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/compiler/spirv/vtn_alu.c b/src/compiler/spirv/vtn_alu.c index f0b69b38f83..1ca1f951200 100644 --- a/src/compiler/spirv/vtn_alu.c +++ b/src/compiler/spirv/vtn_alu.c @@ -349,11 +349,17 @@ vtn_nir_alu_op_for_spirv_opcode(struct vtn_builder *b, /* Conversions: */ case SpvOpQuantizeToF16: return nir_op_fquantize2f16; + +#define nir_type_cast(type, basetype) (nir_alu_type_get_type_size(type) | nir_type_ ## basetype) +#define typed_conversion_op(src, srctype, dst, dsttype) \ + nir_type_conversion_op(nir_type_cast(src, srctype), nir_type_cast(dst, dsttype), nir_rounding_mode_undef) + + case SpvOpConvertFToU: return typed_conversion_op(src, float, dst, uint); + case SpvOpConvertFToS: return typed_conversion_op(src, float, dst, int); + case SpvOpConvertSToF: return typed_conversion_op(src, int, dst, float); + case SpvOpConvertUToF: return typed_conversion_op(src, uint, dst, float); + case SpvOpUConvert: - case SpvOpConvertFToU: - case SpvOpConvertFToS: - case SpvOpConvertSToF: - case SpvOpConvertUToF: case SpvOpSConvert: case SpvOpFConvert: return nir_type_conversion_op(src, dst, nir_rounding_mode_undef); -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/6] spirv/radv: add AMD_gcn_shader capability, remove current extensions
On 14.03.2018 16:03, Alejandro Piñeiro wrote: On 14/03/18 15:55, Daniel Schürmann wrote: Not sure, if I'm asked here :) As AMD_gcn_shader seems to be the only extension without new capability, I am fine with just handling it as if. Well, I was exactly asking this, if everybody involved is fine with this. Bonus points to get a review to this patch. Additionally, we might want to rename it to gcn_shader to be consistent (or add the vendor names to all capabilities). Makes sense. Do you want to introduce one field per capability or have some capabilities merged (like now)? Which capabilities are merged? storage_16bit: SpvCapabilityStorageUniformBufferBlock16, SpvCapabilityStorageUniform16, SpvCapabilityStoragePushConstant16, SpvCapabilityStorageInputOutput16 variable_pointers: SpvCapabilityVariablePointersStorageBuffer, SpvCapabilityVariablePointers subgroup_arithmetic: SpvCapabilityGroupNonUniformArithmetic, SpvCapabilityGroupNonUniformClustered subgroup_shuffle: SpvCapabilityGroupNonUniformShuffle, SpvCapabilityGroupNonUniformShuffleRelative tessellation: SpvCapabilityTessellation, SpvCapabilityTessellationPointSize On 11.03.2018 16:25, Alejandro Piñeiro wrote: FWIW, this is the patch that Im more interested to get a review. It is also the one that probably would need some discussion. Fortunately this one can be reviewed independently of the rest of the patches, so the others can wait a little. Getting this into would make the rebase of this series more easy. So: ping (please) On 08/03/18 16:00, Alejandro Piñeiro wrote: So now, during spirv_to_nir, it uses the capability instead of the extension. Note that we are really doing here is treating SPV_AMD_gcn_shader as other supported extensions. SPV_AMD_gcn_shader is not the first SPV extension supported. For example, the capability draw_parameters infers if the extension SPV_KHR_shader_draw_parameters is supported or not. This could be seen as counter-intuitive, and that it would be easier to define which extensions are supported, and based our checks on that, but we need to take into account that some capabilities are optional from core, and others came from new extensions. Also this commit would make the implementation of ARB_spirv_extensions easier. --- Note that I'm aware that this can be somewhat confusing at first. But most of the SPV extensions defines a new capability, so it makes sense to add one, and compute the other based on that. As I mention on a different patch on this series, it was easier to compute extensions from capabilities, instead of the other way around, because core SPIR-V defines optional capabilities without the need of an extension. Having said so, I have read the SPV_AMD_gcn_shader, and it doesn't define a new capability (the first one I see that doesn't do that), so I'm somewhat forcing that here. src/amd/vulkan/radv_shader.c | 2 -- src/compiler/shader_info.h | 4 src/compiler/spirv/nir_spirv.h | 1 - src/compiler/spirv/spirv_to_nir.c | 2 +- 4 files changed, 1 insertion(+), 8 deletions(-) diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 85672e600d7..46017290654 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -214,8 +214,6 @@ radv_shader_compile_to_nir(struct radv_device *device, .multiview = true, .subgroup_basic = true, .variable_pointers = true, - }, - .exts = { .AMD_gcn_shader = true, }, }; diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h index b1e200070f7..502b7901370 100644 --- a/src/compiler/shader_info.h +++ b/src/compiler/shader_info.h @@ -51,10 +51,6 @@ struct spirv_supported_capabilities { bool subgroup_quad; bool subgroup_shuffle; bool subgroup_vote; -}; - -/* The supported extensions which add extended instructions */ -struct spirv_supported_extensions { bool AMD_gcn_shader; }; diff --git a/src/compiler/spirv/nir_spirv.h b/src/compiler/spirv/nir_spirv.h index 87d4120c380..d2766abb7f9 100644 --- a/src/compiler/spirv/nir_spirv.h +++ b/src/compiler/spirv/nir_spirv.h @@ -60,7 +60,6 @@ struct spirv_to_nir_options { bool lower_workgroup_access_to_offsets; struct spirv_supported_capabilities caps; - struct spirv_supported_extensions exts; struct { void (*func)(void *private_data, diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 66b87c049bb..6aa4a4d6b6f 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -374,7 +374,7 @@ vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, if (strcmp((const char *)&w[2], "GLSL.std.450") == 0) { val->ext_handler = vtn_handle_glsl450_instruction; } else if ((strcmp((const char *)&w[2], "SPV_AMD_gcn_shader") == 0) - && (b->options && b->options
Re: [Mesa-dev] [PATCH 2/6] spirv/radv: add AMD_gcn_shader capability, remove current extensions
On 14/03/18 15:55, Daniel Schürmann wrote: > Not sure, if I'm asked here :) > As AMD_gcn_shader seems to be the only extension without new capability, > I am fine with just handling it as if. Well, I was exactly asking this, if everybody involved is fine with this. Bonus points to get a review to this patch. > > Additionally, we might want to rename it to gcn_shader to be consistent > (or add the vendor names to all capabilities). Makes sense. > > Do you want to introduce one field per capability or have some > capabilities merged (like now)? Which capabilities are merged? > > > On 11.03.2018 16:25, Alejandro Piñeiro wrote: >> FWIW, this is the patch that Im more interested to get a review. It is >> also the one that probably would need some discussion. Fortunately this >> one can be reviewed independently of the rest of the patches, so the >> others can wait a little. Getting this into would make the rebase of >> this series more easy. >> >> So: ping (please) >> >> >> On 08/03/18 16:00, Alejandro Piñeiro wrote: >>> So now, during spirv_to_nir, it uses the capability instead of the >>> extension. Note that we are really doing here is treating >>> SPV_AMD_gcn_shader as other supported extensions. SPV_AMD_gcn_shader >>> is not the first SPV extension supported. For example, the capability >>> draw_parameters infers if the extension SPV_KHR_shader_draw_parameters >>> is supported or not. >>> >>> This could be seen as counter-intuitive, and that it would be easier >>> to define which extensions are supported, and based our checks on >>> that, but we need to take into account that some capabilities are >>> optional from core, and others came from new extensions. >>> >>> Also this commit would make the implementation of ARB_spirv_extensions >>> easier. >>> --- >>> >>> Note that I'm aware that this can be somewhat confusing at first. But >>> most of the SPV extensions defines a new capability, so it makes sense >>> to add one, and compute the other based on that. As I mention on a >>> different patch on this series, it was easier to compute extensions >>> from capabilities, instead of the other way around, because core >>> SPIR-V defines optional capabilities without the need of an extension. >>> >>> Having said so, I have read the SPV_AMD_gcn_shader, and it doesn't >>> define a new capability (the first one I see that doesn't do that), so >>> I'm somewhat forcing that here. >>> >>> >>> src/amd/vulkan/radv_shader.c | 2 -- >>> src/compiler/shader_info.h | 4 >>> src/compiler/spirv/nir_spirv.h | 1 - >>> src/compiler/spirv/spirv_to_nir.c | 2 +- >>> 4 files changed, 1 insertion(+), 8 deletions(-) >>> >>> diff --git a/src/amd/vulkan/radv_shader.c >>> b/src/amd/vulkan/radv_shader.c >>> index 85672e600d7..46017290654 100644 >>> --- a/src/amd/vulkan/radv_shader.c >>> +++ b/src/amd/vulkan/radv_shader.c >>> @@ -214,8 +214,6 @@ radv_shader_compile_to_nir(struct radv_device >>> *device, >>> .multiview = true, >>> .subgroup_basic = true, >>> .variable_pointers = true, >>> - }, >>> - .exts = { >>> .AMD_gcn_shader = true, >>> }, >>> }; >>> diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h >>> index b1e200070f7..502b7901370 100644 >>> --- a/src/compiler/shader_info.h >>> +++ b/src/compiler/shader_info.h >>> @@ -51,10 +51,6 @@ struct spirv_supported_capabilities { >>> bool subgroup_quad; >>> bool subgroup_shuffle; >>> bool subgroup_vote; >>> -}; >>> - >>> -/* The supported extensions which add extended instructions */ >>> -struct spirv_supported_extensions { >>> bool AMD_gcn_shader; >>> }; >>> diff --git a/src/compiler/spirv/nir_spirv.h >>> b/src/compiler/spirv/nir_spirv.h >>> index 87d4120c380..d2766abb7f9 100644 >>> --- a/src/compiler/spirv/nir_spirv.h >>> +++ b/src/compiler/spirv/nir_spirv.h >>> @@ -60,7 +60,6 @@ struct spirv_to_nir_options { >>> bool lower_workgroup_access_to_offsets; >>> struct spirv_supported_capabilities caps; >>> - struct spirv_supported_extensions exts; >>> struct { >>> void (*func)(void *private_data, >>> diff --git a/src/compiler/spirv/spirv_to_nir.c >>> b/src/compiler/spirv/spirv_to_nir.c >>> index 66b87c049bb..6aa4a4d6b6f 100644 >>> --- a/src/compiler/spirv/spirv_to_nir.c >>> +++ b/src/compiler/spirv/spirv_to_nir.c >>> @@ -374,7 +374,7 @@ vtn_handle_extension(struct vtn_builder *b, >>> SpvOp opcode, >>> if (strcmp((const char *)&w[2], "GLSL.std.450") == 0) { >>> val->ext_handler = vtn_handle_glsl450_instruction; >>> } else if ((strcmp((const char *)&w[2], >>> "SPV_AMD_gcn_shader") == 0) >>> - && (b->options && b->options->exts.AMD_gcn_shader)) { >>> + && (b->options && b->options->caps.AMD_gcn_shader)) { >>> val->ext_handler = vtn_handle_amd_gcn_shader_instruction; >>> } else { >>> vtn_fail("Un
[Mesa-dev] [Bug 105506] Vulkan MSAA is broken on SI
https://bugs.freedesktop.org/show_bug.cgi?id=105506 --- Comment #3 from Turo Lamminen --- Created attachment 138108 --> https://bugs.freedesktop.org/attachment.cgi?id=138108&action=edit vktrace trace -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [vulkan][intel] SIGBUS, Bus error during command buffer recording
Hi! When recording command buffer I get error: Program received signal SIGBUS, Bus error. anv_state_stream_alloc (stream=stream@entry=0x9dbf9dd8, size=64, alignment=alignment@entry=32) at vulkan/anv_allocator.c:913 913 VG_NOACCESS_WRITE(&sb->block, stream->block); 0 in anv_state_stream_alloc of vulkan/anv_allocator.c:913 1 in anv_cmd_buffer_alloc_dynamic_state of vulkan/anv_batch_chain.c:654 2 in anv_cmd_buffer_push_constants of vulkan/anv_cmd_buffer.c:729 3 in cmd_buffer_flush_push_constants of vulkan/genX_cmd_buffer.c:2420 4 in gen9_cmd_buffer_flush_state of vulkan/genX_cmd_buffer.c:2571 5 in gen9_CmdDrawIndexed of vulkan/genX_cmd_buffer.c:2709 6 in ?? of /usr/lib/libVkLayer_core_validation.so 7 in ?? of /usr/lib/libVkLayer_parameter_validation.so 8 in ?? of /usr/lib/libVkLayer_threading.so 9 in vkcmd_create_secondary_command_buffer of vkcmd.c:207 10 in vkcmd_create_secondary_command_buffer_for_inst of vkcmd.c:88 11 in scn_load_scene of scene.c:407 12 in create_scene of main.c:903 13 in main of main.c:583 I enabled validation layers and everything is fine (no output). This happens for push constants. I use 80 bytes size. I have 128 bytes on my system. I send the same range for both stages. The function that records buffer: https://pastebin.com/vN2WjA1W I use Intel Corporation HD Graphics 630. I increased dedicated memory to 1024MB and nothing changed. I also tried to reduce push constant size to 64 (send only matrix) and it did not help. What could be the reason? Thanks, Vyacheslav ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC] Mesa 17.3.x release problems and process improvements
On Mon, 2018-03-12 at 15:48 +, Emil Velikov wrote: > On 12 March 2018 at 14:20, Andres Gomez wrote: [...] > > On Tue, 2018-03-06 at 19:34 +, Emil Velikov wrote: > > > > [...] > > > > > A few other ideas that were also came to mind: > > > > > > - Round robin - where me/Igalia team will check for outstanding > > >patches, backports, etc. > > > > I'm open to this. So far Juan and I have been doing this task while > > being on relase duty but maybe it is better to explictly agree among us > > (on a specific policy/shift rotation). > > > > If there's an agreement to have a the per-team maintainer, this won't > be needed... I think. > > In the meanwhile, do share how you envision this? Maybe I'm not understanding your proposal and you have something else in mind but, as I see it, during the 2 weeks before a bugfix release happens, this is what I was doing at the beginning of my working day: * Check the new landed patches. Identify the ones tagged for the stable branch and cross check them with the threads in the -stable ML. * Apply the nominated patches and let Travis-CI check they were not breaking the stable queue. * If any nominated patch was breaking Travis-CI or not applying into the stable queue (with a trivial conflict resolution), ping the author to ask for a backport, or clarification. * From the list of landed patches, identify non nominated ones that look like they should get into the stable branch. I did this is a loose more relaxed way. * Check in the -stable ML for stagnated threads and poke the authors, if needed. I did this more often when getting closer to the release date. * Nightly we (Igalia) have our own custom automation to run piglit and VK-GL-CTS with i965 and the software drivers in search of regressions in the stable queue. > > > - Have two distinct emails - an announcement and a second RFC that > > >lists the rejected patches and ones with outstanding backports > > > > I don't think this would be really necessary, specially if we adopt > > GitLab. > > > > The idea is what to do, until we adopt it or any other solution. Would > the split help people? To be honest, so far we keep a review system based on a mailing list, I think the -stable one suffices, without needing a new one. I'm not opposing, though. -- Br, Andres ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/6] spirv/radv: add AMD_gcn_shader capability, remove current extensions
Not sure, if I'm asked here :) As AMD_gcn_shader seems to be the only extension without new capability, I am fine with just handling it as if. Additionally, we might want to rename it to gcn_shader to be consistent (or add the vendor names to all capabilities). Do you want to introduce one field per capability or have some capabilities merged (like now)? On 11.03.2018 16:25, Alejandro Piñeiro wrote: FWIW, this is the patch that Im more interested to get a review. It is also the one that probably would need some discussion. Fortunately this one can be reviewed independently of the rest of the patches, so the others can wait a little. Getting this into would make the rebase of this series more easy. So: ping (please) On 08/03/18 16:00, Alejandro Piñeiro wrote: So now, during spirv_to_nir, it uses the capability instead of the extension. Note that we are really doing here is treating SPV_AMD_gcn_shader as other supported extensions. SPV_AMD_gcn_shader is not the first SPV extension supported. For example, the capability draw_parameters infers if the extension SPV_KHR_shader_draw_parameters is supported or not. This could be seen as counter-intuitive, and that it would be easier to define which extensions are supported, and based our checks on that, but we need to take into account that some capabilities are optional from core, and others came from new extensions. Also this commit would make the implementation of ARB_spirv_extensions easier. --- Note that I'm aware that this can be somewhat confusing at first. But most of the SPV extensions defines a new capability, so it makes sense to add one, and compute the other based on that. As I mention on a different patch on this series, it was easier to compute extensions from capabilities, instead of the other way around, because core SPIR-V defines optional capabilities without the need of an extension. Having said so, I have read the SPV_AMD_gcn_shader, and it doesn't define a new capability (the first one I see that doesn't do that), so I'm somewhat forcing that here. src/amd/vulkan/radv_shader.c | 2 -- src/compiler/shader_info.h| 4 src/compiler/spirv/nir_spirv.h| 1 - src/compiler/spirv/spirv_to_nir.c | 2 +- 4 files changed, 1 insertion(+), 8 deletions(-) diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 85672e600d7..46017290654 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -214,8 +214,6 @@ radv_shader_compile_to_nir(struct radv_device *device, .multiview = true, .subgroup_basic = true, .variable_pointers = true, - }, - .exts = { .AMD_gcn_shader = true, }, }; diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h index b1e200070f7..502b7901370 100644 --- a/src/compiler/shader_info.h +++ b/src/compiler/shader_info.h @@ -51,10 +51,6 @@ struct spirv_supported_capabilities { bool subgroup_quad; bool subgroup_shuffle; bool subgroup_vote; -}; - -/* The supported extensions which add extended instructions */ -struct spirv_supported_extensions { bool AMD_gcn_shader; }; diff --git a/src/compiler/spirv/nir_spirv.h b/src/compiler/spirv/nir_spirv.h index 87d4120c380..d2766abb7f9 100644 --- a/src/compiler/spirv/nir_spirv.h +++ b/src/compiler/spirv/nir_spirv.h @@ -60,7 +60,6 @@ struct spirv_to_nir_options { bool lower_workgroup_access_to_offsets; struct spirv_supported_capabilities caps; - struct spirv_supported_extensions exts; struct { void (*func)(void *private_data, diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 66b87c049bb..6aa4a4d6b6f 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -374,7 +374,7 @@ vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, if (strcmp((const char *)&w[2], "GLSL.std.450") == 0) { val->ext_handler = vtn_handle_glsl450_instruction; } else if ((strcmp((const char *)&w[2], "SPV_AMD_gcn_shader") == 0) -&& (b->options && b->options->exts.AMD_gcn_shader)) { +&& (b->options && b->options->caps.AMD_gcn_shader)) { val->ext_handler = vtn_handle_amd_gcn_shader_instruction; } else { vtn_fail("Unsupported extension"); ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105507] Crash when destroying a newly resized EGLsurface with wayland egl (dri2)
https://bugs.freedesktop.org/show_bug.cgi?id=105507 Bug ID: 105507 Summary: Crash when destroying a newly resized EGLsurface with wayland egl (dri2) Product: Mesa Version: 17.3 Hardware: x86-64 (AMD64) OS: Linux (All) Status: NEW Severity: normal Priority: medium Component: EGL/Wayland Assignee: wayland-b...@lists.freedesktop.org Reporter: johan.hels...@qt.io QA Contact: mesa-dev@lists.freedesktop.org In dri2_wl_surface_release_buffers, a wl_buffer is not destroyed if it's locked. Afterwards it's set to null regardless (dri2_surf->color_buffers[i].wl_buffer = NULL;) Normally, this is fine, since the buffer will be released by the wl_buffer_release event when the compositor is done with it. But if the EGLSurface is destroyed first, then the event queue for the surface (and for the wl_buffer) is destroyed, and the wl_release event then causes a crash because we try to use a destroyed event queue. One solution would be to maintain a separate list of buffers we tried to destroy, but couldn't because they were locked. And make sure they are destroyed in dri2_wl_destroy_surface. This might not be a problem users frequently run into, but it's causing many unit tests in Qt to be flaky, and we probably have to blacklist them until this is fixed (https://bugreports.qt.io/browse/QTBUG-66848) -- You are receiving this mail because: You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] spirv: update arguments for vtn_nir_alu_op_for_spirv_opcode()
On March 14, 2018 03:42:04 Samuel Iglesias Gonsálvez wrote: We don't need anymore the source and destination's data type, just their bitsize. Signed-off-by: Samuel Iglesias Gonsálvez --- src/compiler/spirv/spirv_to_nir.c | 4 ++-- src/compiler/spirv/vtn_alu.c | 30 ++ src/compiler/spirv/vtn_private.h | 2 +- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 3de45c47371..516fce1ecec 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -1703,8 +1703,8 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, }; nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap, - src_alu_type, - dst_alu_type); + nir_alu_type_get_type_size(src_alu_type), + nir_alu_type_get_type_size(dst_alu_type)); nir_const_value src[4]; for (unsigned i = 0; i < count - 4; i++) { diff --git a/src/compiler/spirv/vtn_alu.c b/src/compiler/spirv/vtn_alu.c index 15158b39520..3720a9484ff 100644 --- a/src/compiler/spirv/vtn_alu.c +++ b/src/compiler/spirv/vtn_alu.c @@ -275,7 +275,7 @@ vtn_handle_bitcast(struct vtn_builder *b, struct vtn_ssa_value *dest, nir_op vtn_nir_alu_op_for_spirv_opcode(struct vtn_builder *b, SpvOp opcode, bool *swap, -nir_alu_type src, nir_alu_type dst) +unsigned src_bit_size, unsigned dst_bit_size) { /* Indicates that the first two arguments should be swapped. This is * used for implementing greater-than and less-than-or-equal. @@ -388,8 +388,8 @@ vtn_nir_alu_op_for_spirv_opcode(struct vtn_builder *b, default: unreachable("Invalid opcode"); } - src_type |= nir_alu_type_get_type_size(src); - dst_type |= nir_alu_type_get_type_size(dst); + src_type |= src_bit_size; + dst_type |= dst_bit_size; return nir_type_conversion_op(src_type, dst_type, nir_rounding_mode_undef); } /* Derivatives: */ @@ -575,10 +575,12 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, case SpvOpFUnordLessThanEqual: case SpvOpFUnordGreaterThanEqual: { bool swap; - nir_alu_type src_alu_type = nir_get_nir_type_for_glsl_type(vtn_src[0]->type); - nir_alu_type dst_alu_type = nir_get_nir_type_for_glsl_type(type); + unsigned src_bit_size = + nir_alu_type_get_type_size(nir_get_nir_type_for_glsl_type(vtn_src[0]->type)); + unsigned dst_bit_size = + nir_alu_type_get_type_size(nir_get_nir_type_for_glsl_type(type)); Just use glsl_type_get_bit_size nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap, - src_alu_type, dst_alu_type); + src_bit_size, dst_bit_size); if (swap) { nir_ssa_def *tmp = src[0]; @@ -602,10 +604,12 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, case SpvOpFOrdLessThanEqual: case SpvOpFOrdGreaterThanEqual: { bool swap; - nir_alu_type src_alu_type = nir_get_nir_type_for_glsl_type(vtn_src[0]->type); - nir_alu_type dst_alu_type = nir_get_nir_type_for_glsl_type(type); + unsigned src_bit_size = + nir_alu_type_get_type_size(nir_get_nir_type_for_glsl_type(vtn_src[0]->type)); + unsigned dst_bit_size = + nir_alu_type_get_type_size(nir_get_nir_type_for_glsl_type(type)); Same here nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap, - src_alu_type, dst_alu_type); + src_bit_size, dst_bit_size); if (swap) { nir_ssa_def *tmp = src[0]; @@ -640,10 +644,12 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, default: { bool swap; - nir_alu_type src_alu_type = nir_get_nir_type_for_glsl_type(vtn_src[0]->type); - nir_alu_type dst_alu_type = nir_get_nir_type_for_glsl_type(type); + unsigned src_bit_size = + nir_alu_type_get_type_size(nir_get_nir_type_for_glsl_type(vtn_src[0]->type)); + unsigned dst_bit_size = + nir_alu_type_get_type_size(nir_get_nir_type_for_glsl_type(type)); And here With those changes made, rb. nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap, - src_alu_type, dst_alu_type); + src_bit_size, dst_bit_size); if (swap) { nir_ssa_def *tmp = src[0]; diff --git a/src/compiler/spirv/vtn_private.h b/src/compiler/spirv/vtn_private.h index a8fa612384f..70f660fbd48 100644 --- a/src/compiler/
[Mesa-dev] [PATCH 3/3] RFC: nir+vtn: vec8+vec16 support
This introduces new vec8 and vec16 instructions (which are the only instructions taking more than 4 sources), in order to construct 8 and 16 component vectors. nir_build_alu() is a bit ugly.. perhaps re-work to take an array of src's? Current approach should be something the compiler could inline and optimize reasonably well, but I guess it could do equally well with an array of sources instead? I possibly missed some spots. But probably the best way to track those down is get farther through OpenCL CTS. I don't expect this should cause any issues with vulkan or gl, and I guess it should be fine to fix cl issues as we go. --- src/compiler/glsl/glsl_to_nir.cpp | 5 ++- src/compiler/nir/nir.h| 30 +++--- src/compiler/nir/nir_builder.h| 49 --- src/compiler/nir/nir_builder_opcodes_h.py | 2 +- src/compiler/nir/nir_constant_expressions.py | 33 +-- src/compiler/nir/nir_lower_alu_to_scalar.c| 13 -- src/compiler/nir/nir_lower_io_to_scalar.c | 4 +- src/compiler/nir/nir_lower_load_const_to_scalar.c | 2 +- src/compiler/nir/nir_opcodes.py | 39 +- src/compiler/nir/nir_print.c | 19 ++--- src/compiler/nir/nir_validate.c | 4 +- src/compiler/spirv/spirv_to_nir.c | 6 ++- src/compiler/spirv/vtn_alu.c | 32 +++ src/compiler/spirv/vtn_glsl450.c | 5 ++- 14 files changed, 190 insertions(+), 53 deletions(-) diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index 80eb15f1ab1..c6c7b094794 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -1588,7 +1588,10 @@ nir_visitor::visit(ir_expression *ir) nir_alu_type dst_type = nir_get_nir_type_for_glsl_base_type(out_type); result = nir_build_alu(&b, nir_type_conversion_op(src_type, dst_type, nir_rounding_mode_undef), - srcs[0], NULL, NULL, NULL); + srcs[0], NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL); /* b2i and b2f don't have fixed bit-size versions so the builder will * just assume 32 and we have to fix it up here. */ diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 6a51b7c4ab1..8e5b3a493e0 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -118,16 +118,16 @@ typedef enum { } nir_rounding_mode; typedef union { - float f32[4]; - double f64[4]; - int8_t i8[4]; - uint8_t u8[4]; - int16_t i16[4]; - uint16_t u16[4]; - int32_t i32[4]; - uint32_t u32[4]; - int64_t i64[4]; - uint64_t u64[4]; + float f32[16]; + double f64[16]; + int8_t i8[16]; + uint8_t u8[16]; + int16_t i16[16]; + uint16_t u16[16]; + int32_t i32[16]; + uint32_t u32[16]; + int64_t i64[16]; + uint64_t u64[16]; } nir_const_value; typedef struct nir_constant { @@ -138,7 +138,7 @@ typedef struct nir_constant { * by the type associated with the \c nir_variable. Constants may be * scalars, vectors, or matrices. */ - nir_const_value values[4]; + nir_const_value values[16]; /* we could get this from the var->type but makes clone *much* easier to * not have to care about the type. @@ -663,7 +663,7 @@ typedef struct { * a statement like "foo.xzw = bar.zyx" would have a writemask of 1101b and * a swizzle of {2, x, 1, 0} where x means "don't care." */ - uint8_t swizzle[4]; + uint8_t swizzle[16]; } nir_alu_src; typedef struct { @@ -678,7 +678,7 @@ typedef struct { bool saturate; - unsigned write_mask : 4; /* ignored if dest.is_ssa is true */ + unsigned write_mask : 16; /* ignored if dest.is_ssa is true */ } nir_alu_dest; typedef enum { @@ -807,14 +807,14 @@ typedef struct { /** * The number of components in each input */ - unsigned input_sizes[4]; + unsigned input_sizes[16]; /** * The type of vector that each input takes. Note that negate and * absolute value are only allowed on inputs with int or float type and * behave differently on the two. */ - nir_alu_type input_types[4]; + nir_alu_type input_types[16]; nir_op_algebraic_property algebraic_properties; } nir_op_info; diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h index 36e0ae3ac63..9d1974f7a02 100644 --- a/src/compiler/nir/nir_builder.h +++ b/src/compiler/nir/nir_builder.h @@ -291,7 +291,11 @@ nir_imm_ivec4(nir_builder *build, int x, int y, int z, int w) static inline nir_ssa_def * nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0, - nir_ssa_def *src1, nir_ssa_def *src2, nir_ss
[Mesa-dev] [PATCH 1/3] glsl_types: refactor/prep for vec8/vec16
Refactor things so there isn't so much typing involved to add new things. Also drops a pointless conditional (out of bounds rows or columns already returns error_type in all paths.. might as well drop it rather than make the check more convoluted in the next patch by adding the vec8/vec16 case). Signed-off-by: Rob Clark --- src/compiler/builtin_type_macros.h | 77 ++-- src/compiler/glsl_types.cpp| 120 ++--- src/compiler/glsl_types.h | 1 + 3 files changed, 49 insertions(+), 149 deletions(-) diff --git a/src/compiler/builtin_type_macros.h b/src/compiler/builtin_type_macros.h index 807691824d3..dd8204a1981 100644 --- a/src/compiler/builtin_type_macros.h +++ b/src/compiler/builtin_type_macros.h @@ -31,25 +31,24 @@ DECL_TYPE(error, GL_INVALID_ENUM, GLSL_TYPE_ERROR, 0, 0) DECL_TYPE(void, GL_INVALID_ENUM, GLSL_TYPE_VOID, 0, 0) -DECL_TYPE(bool, GL_BOOL, GLSL_TYPE_BOOL, 1, 1) -DECL_TYPE(bvec2, GL_BOOL_VEC2,GLSL_TYPE_BOOL, 2, 1) -DECL_TYPE(bvec3, GL_BOOL_VEC3,GLSL_TYPE_BOOL, 3, 1) -DECL_TYPE(bvec4, GL_BOOL_VEC4,GLSL_TYPE_BOOL, 4, 1) - -DECL_TYPE(int,GL_INT, GLSL_TYPE_INT, 1, 1) -DECL_TYPE(ivec2, GL_INT_VEC2, GLSL_TYPE_INT, 2, 1) -DECL_TYPE(ivec3, GL_INT_VEC3, GLSL_TYPE_INT, 3, 1) -DECL_TYPE(ivec4, GL_INT_VEC4, GLSL_TYPE_INT, 4, 1) - -DECL_TYPE(uint, GL_UNSIGNED_INT, GLSL_TYPE_UINT, 1, 1) -DECL_TYPE(uvec2, GL_UNSIGNED_INT_VEC2, GLSL_TYPE_UINT, 2, 1) -DECL_TYPE(uvec3, GL_UNSIGNED_INT_VEC3, GLSL_TYPE_UINT, 3, 1) -DECL_TYPE(uvec4, GL_UNSIGNED_INT_VEC4, GLSL_TYPE_UINT, 4, 1) - -DECL_TYPE(float, GL_FLOAT,GLSL_TYPE_FLOAT, 1, 1) -DECL_TYPE(vec2, GL_FLOAT_VEC2, GLSL_TYPE_FLOAT, 2, 1) -DECL_TYPE(vec3, GL_FLOAT_VEC3, GLSL_TYPE_FLOAT, 3, 1) -DECL_TYPE(vec4, GL_FLOAT_VEC4, GLSL_TYPE_FLOAT, 4, 1) +#define DECL_VEC_TYPE(stype, vtype, btype, etype, ...) \ + DECL_TYPE(stype, etype ##__VA_ARGS__, btype, 1, 1) \ + DECL_TYPE(vtype ## 2, etype ##_VEC2 ##__VA_ARGS__, btype, 2, 1) \ + DECL_TYPE(vtype ## 3, etype ##_VEC3 ##__VA_ARGS__, btype, 3, 1) \ + DECL_TYPE(vtype ## 4, etype ##_VEC4 ##__VA_ARGS__, btype, 4, 1) + +DECL_VEC_TYPE(bool, bvec, GLSL_TYPE_BOOL,GL_BOOL) +DECL_VEC_TYPE(int, ivec, GLSL_TYPE_INT, GL_INT) +DECL_VEC_TYPE(uint, uvec, GLSL_TYPE_UINT,GL_UNSIGNED_INT) +DECL_VEC_TYPE(float, vec,GLSL_TYPE_FLOAT, GL_FLOAT) +DECL_VEC_TYPE(float16_t, f16vec, GLSL_TYPE_FLOAT16, GL_FLOAT16, _NV) +DECL_VEC_TYPE(double,dvec, GLSL_TYPE_DOUBLE, GL_DOUBLE) +DECL_VEC_TYPE(int64_t, i64vec, GLSL_TYPE_INT64, GL_INT64, _ARB) +DECL_VEC_TYPE(uint64_t, u64vec, GLSL_TYPE_UINT64, GL_UNSIGNED_INT64, _ARB) +DECL_VEC_TYPE(int16_t, i16vec, GLSL_TYPE_INT16, GL_INT16, _NV) +DECL_VEC_TYPE(uint16_t, u16vec, GLSL_TYPE_UINT16, GL_UNSIGNED_INT16, _NV) +DECL_VEC_TYPE(int8_t,i8vec, GLSL_TYPE_INT8,GL_INT8, _NV) +DECL_VEC_TYPE(uint8_t, u8vec, GLSL_TYPE_UINT8, GL_UNSIGNED_INT8, _NV) DECL_TYPE(mat2, GL_FLOAT_MAT2, GLSL_TYPE_FLOAT, 2, 2) DECL_TYPE(mat3, GL_FLOAT_MAT3, GLSL_TYPE_FLOAT, 3, 3) @@ -62,11 +61,6 @@ DECL_TYPE(mat3x4, GL_FLOAT_MAT3x4, GLSL_TYPE_FLOAT, 4, 3) DECL_TYPE(mat4x2, GL_FLOAT_MAT4x2, GLSL_TYPE_FLOAT, 2, 4) DECL_TYPE(mat4x3, GL_FLOAT_MAT4x3, GLSL_TYPE_FLOAT, 3, 4) -DECL_TYPE(float16_t, GL_FLOAT16_NV,GLSL_TYPE_FLOAT16, 1, 1) -DECL_TYPE(f16vec2, GL_FLOAT16_VEC2_NV, GLSL_TYPE_FLOAT16, 2, 1) -DECL_TYPE(f16vec3, GL_FLOAT16_VEC3_NV, GLSL_TYPE_FLOAT16, 3, 1) -DECL_TYPE(f16vec4, GL_FLOAT16_VEC4_NV, GLSL_TYPE_FLOAT16, 4, 1) - DECL_TYPE(f16mat2, GL_FLOAT16_MAT2_AMD, GLSL_TYPE_FLOAT16, 2, 2) DECL_TYPE(f16mat3, GL_FLOAT16_MAT3_AMD, GLSL_TYPE_FLOAT16, 3, 3) DECL_TYPE(f16mat4, GL_FLOAT16_MAT4_AMD, GLSL_TYPE_FLOAT16, 4, 4) @@ -78,11 +72,6 @@ DECL_TYPE(f16mat3x4, GL_FLOAT16_MAT3x4_AMD, GLSL_TYPE_FLOAT16, 4, 3) DECL_TYPE(f16mat4x2, GL_FLOAT16_MAT4x2_AMD, GLSL_TYPE_FLOAT16, 2, 4) DECL_TYPE(f16mat4x3, GL_FLOAT16_MAT4x3_AMD, GLSL_TYPE_FLOAT16, 3, 4) -DECL_TYPE(double, GL_DOUBLE,GLSL_TYPE_DOUBLE, 1, 1) -DECL_TYPE(dvec2, GL_DOUBLE_VEC2, GLSL_TYPE_DOUBLE, 2, 1) -DECL_TYPE(dvec3, GL_DOUBLE_VEC3, GLSL_TYPE_DOUBLE, 3, 1) -DECL_TYPE(dvec4, GL_DOUBLE_VEC4, GLSL_TYPE_DOUBLE, 4, 1) - DECL_TYPE(dmat2, GL_DOUBLE_MAT2, GLSL_TYPE_DOUBLE, 2, 2) DECL_TYPE(dmat3, GL_DOUBLE_MAT3, GLSL_TYPE_DOUBLE, 3, 3) DECL_TYPE(dmat4, GL_DOUBLE_MAT4, GLSL_TYPE_DOUBLE, 4, 4) @@ -94,36 +83,6 @@ DECL_TYPE(dmat3x4, GL_DOUBLE_MAT3x4, GLSL_TYPE_DOUBLE, 4, 3) DECL_TYPE(dmat4x2, GL_DOUBLE_MAT4x2, GLSL_TYPE_DOUBLE, 2, 4) DECL_TYPE(dmat4x3, GL_DOUBLE_MAT4x3, GLSL_TYPE_DOUBLE, 3, 4) -DECL_TYPE(int64_t, GL_INT64_ARB, GLSL_TYPE_INT64, 1, 1) -DECL_TYPE(i64vec2, GL_INT64_VEC2_ARB, GLSL_TYPE_INT64, 2, 1) -DECL_TYPE(i64vec3, GL_INT64_VEC3_ARB, GLSL_TYPE_INT64, 3, 1) -DECL_TYPE(i64vec4, GL_INT64_VEC4_ARB, G
[Mesa-dev] [PATCH 2/3] glsl_types: vec8/vec16 support
Not used in GL but 8 and 16 component vectors exist in OpenCL. Signed-off-by: Rob Clark --- OpenCL committee: "Sure everyone switched to scalar instruction sets, but let's double down on the vec4" :-P src/compiler/builtin_type_macros.h | 4 +++- src/compiler/glsl_types.cpp| 8 +++- src/compiler/nir/nir_print.c | 4 +++- src/compiler/nir/nir_validate.c| 4 +++- src/compiler/nir_types.cpp | 10 ++ src/compiler/spirv/spirv_to_nir.c | 3 +-- 6 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/compiler/builtin_type_macros.h b/src/compiler/builtin_type_macros.h index dd8204a1981..55ad2b89554 100644 --- a/src/compiler/builtin_type_macros.h +++ b/src/compiler/builtin_type_macros.h @@ -35,7 +35,9 @@ DECL_TYPE(void, GL_INVALID_ENUM, GLSL_TYPE_VOID, 0, 0) DECL_TYPE(stype, etype ##__VA_ARGS__, btype, 1, 1) \ DECL_TYPE(vtype ## 2, etype ##_VEC2 ##__VA_ARGS__, btype, 2, 1) \ DECL_TYPE(vtype ## 3, etype ##_VEC3 ##__VA_ARGS__, btype, 3, 1) \ - DECL_TYPE(vtype ## 4, etype ##_VEC4 ##__VA_ARGS__, btype, 4, 1) + DECL_TYPE(vtype ## 4, etype ##_VEC4 ##__VA_ARGS__, btype, 4, 1) \ + DECL_TYPE(vtype ## 8, 0, btype, 8, 1) \ + DECL_TYPE(vtype ## 16, 0, btype, 16, 1) DECL_VEC_TYPE(bool, bvec, GLSL_TYPE_BOOL,GL_BOOL) DECL_VEC_TYPE(int, ivec, GLSL_TYPE_INT, GL_INT) diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp index 8b18f2f3210..b8caddb4066 100644 --- a/src/compiler/glsl_types.cpp +++ b/src/compiler/glsl_types.cpp @@ -498,7 +498,12 @@ glsl_type::vec(unsigned components, const glsl_type *const ts[]) { unsigned n = components; - if (n == 0 || n > 4) + if (components == 8) + n = 5; + else if (components == 16) + n = 6; + + if (n == 0 || n > 6) return error_type; return ts[n - 1]; @@ -508,6 +513,7 @@ glsl_type::vec(unsigned components, const glsl_type *const ts[]) static const glsl_type *const ts[] = { \ sname ## _type, vname ## 2_type,\ vname ## 3_type, vname ## 4_type, \ + vname ## 8_type, vname ## 16_type, \ }; \ glsl_type::vec(components, ts);\ }) diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 7888dbd3384..21f13097651 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -85,7 +85,9 @@ print_register(nir_register *reg, print_state *state) fprintf(fp, "r%u", reg->index); } -static const char *sizes[] = { "error", "vec1", "vec2", "vec3", "vec4" }; +static const char *sizes[] = { "error", "vec1", "vec2", "vec3", "vec4", + "error", "error", "error", "vec8", + "error", "error", "error", "vec16"}; static void print_register_decl(nir_register *reg, print_state *state) diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c index a49948fbb48..725ba43152c 100644 --- a/src/compiler/nir/nir_validate.c +++ b/src/compiler/nir/nir_validate.c @@ -294,7 +294,9 @@ validate_ssa_def(nir_ssa_def *def, validate_state *state) validate_assert(state, def->parent_instr == state->instr); - validate_assert(state, def->num_components <= 4); + validate_assert(state, (def->num_components <= 4) || + (def->num_components == 8) || + (def->num_components == 16)); list_validate(&def->uses); list_validate(&def->if_uses); diff --git a/src/compiler/nir_types.cpp b/src/compiler/nir_types.cpp index ee6b06aea63..78b66803f08 100644 --- a/src/compiler/nir_types.cpp +++ b/src/compiler/nir_types.cpp @@ -366,15 +366,17 @@ glsl_scalar_type(enum glsl_base_type base_type) const glsl_type * glsl_vector_type(enum glsl_base_type base_type, unsigned components) { - assert(components > 1 && components <= 4); - return glsl_type::get_instance(base_type, components, 1); + const glsl_type *t = glsl_type::get_instance(base_type, components, 1); + assert(t != glsl_type::error_type); + return t; } const glsl_type * glsl_matrix_type(enum glsl_base_type base_type, unsigned rows, unsigned columns) { - assert(rows > 1 && rows <= 4 && columns >= 1 && columns <= 4); - return glsl_type::get_instance(base_type, rows, columns); + const glsl_type *t = glsl_type::get_instance(base_type, rows, columns); + assert(t != glsl_type::error_type); + return t; } const glsl_type * diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 42a559122a6..953c9b86c3a 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -934,7 +934,6 @@ vtn_type_layout_std430(struct vtn_builder *b, struct vtn_type *type, case vtn_base_type_vector: { uint32_t comp_size = glsl_get_bit_size(type->type) / 8; - assert(type->length > 0 && type->length <= 4); unsigned