[Mesa-dev] [RFC v2] nir: Add a uniformity analysis pass
v2: Start with everything uniform and mark non-uniformity. This is required in order to properly handle loops. Cc: Kenneth Graunke Cc: Connor Abbott --- src/compiler/Makefile.sources | 1 + src/compiler/nir/nir.c| 3 + src/compiler/nir/nir.h| 18 +++ src/compiler/nir/nir_analyze_uniformity.c | 188 ++ 4 files changed, 210 insertions(+) create mode 100644 src/compiler/nir/nir_analyze_uniformity.c diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index 2455d4e..407197c 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -186,6 +186,7 @@ NIR_GENERATED_FILES = \ NIR_FILES = \ nir/nir.c \ nir/nir.h \ + nir/nir_analyze_uniformity.c \ nir/nir_array.h \ nir/nir_builder.h \ nir/nir_clone.c \ diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index a9fac96..fd64ccd 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -357,6 +357,8 @@ nir_block_create(nir_shader *shader) exec_list_make_empty(&block->instr_list); + block->uniform = false; + return block; } @@ -1518,6 +1520,7 @@ nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, list_inithead(&def->if_uses); def->num_components = num_components; def->bit_size = bit_size; + def->uniform = false; if (instr->block) { nir_function_impl *impl = diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index dd1e407..4da318d 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -462,6 +462,14 @@ typedef struct nir_ssa_def { /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */ uint8_t bit_size; + + /** True if this SSA def has the same value for all invocations +* +* An SSA def can be uniform even if it is defined in non-uniform +* control-flow. In this case, it has the same value for all invocations +* which reach its definition. +*/ + bool uniform; } nir_ssa_def; struct nir_src; @@ -1480,6 +1488,14 @@ typedef struct nir_block { /** generic block index; generated by nir_index_blocks */ unsigned index; + /** True if this block is only executed uniformly +* +* A block is said to execute uniformly if, whenever one invocation enters +* the block, all invocations enter the block and have all taken the same +* path from the start block to this block. +*/ + bool uniform; + /* * Each block can only have up to 2 successors, so we put them in a simple * array - no need for anything more complicated. @@ -2567,6 +2583,8 @@ bool nir_normalize_cubemap_coords(nir_shader *shader); void nir_live_ssa_defs_impl(nir_function_impl *impl); +void nir_analyze_uniformity(nir_shader *shader); + void nir_loop_analyze_impl(nir_function_impl *impl, nir_variable_mode indirect_mask); diff --git a/src/compiler/nir/nir_analyze_uniformity.c b/src/compiler/nir/nir_analyze_uniformity.c new file mode 100644 index 000..42693f3 --- /dev/null +++ b/src/compiler/nir/nir_analyze_uniformity.c @@ -0,0 +1,188 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" + +/** + * Uniform analysis + */ + +static bool +src_is_uniform(nir_src *src, void *state) +{ + return src->is_ssa && src->ssa->uniform; +} + +static bool +mark_def_non_uniform(nir_ssa_def *def, void *state) +{ + bool *progress = state; + + if (def->uniform) { + *progress = true; + def->uniform = false; + } + + return true; +} + +static void +mark_block_non_uniform(nir_block *block, bool *progress) +{ + if (block->uniform) { + *progress = true; + block->uniform = false; + } +} + +static bool +block_all_predecessors_uniform(nir_block *block) +{ + struct set_entry *entry; + set_for
Re: [Mesa-dev] [PATCH] ralloc: don't leave out the alignment factor
I've spent the whole night tracking down this bug I thought it was an issue with a recent upgrade of binutils Please add my: Tested by: Mike Lothian On Sat, 4 Mar 2017 at 00:49 Grazvydas Ignotas wrote: > Experimentation shows that without alignment factor gcc and clang choose > a factor of 16 even on IA-32, which doesn't match what malloc() uses (8). > The problem is it makes gcc assume the pointer is 16 byte aligned, so > with -O3 it starts using aligned SSE instructions that later fault, > so always specify a suitable alignment factor. > > Cc: Jonas Pfeil > Fixes: cd2b55e5 "ralloc: Make sure ralloc() allocations match malloc()'s > alignment." > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100049 > Signed-off-by: Grazvydas Ignotas > --- > no commit access > > src/util/ralloc.c | 4 +++- > 1 file changed, 3 insertions(+), 1 deletion(-) > > diff --git a/src/util/ralloc.c b/src/util/ralloc.c > index 03283de..7bf192e 100644 > --- a/src/util/ralloc.c > +++ b/src/util/ralloc.c > @@ -59,8 +59,10 @@ _CRTIMP int _vscprintf(const char *format, va_list > argptr); > struct > #ifdef _MSC_VER > __declspec(align(8)) > +#elif defined(__LP64__) > + __attribute__((aligned(16))) > #else > - __attribute__((aligned)) > + __attribute__((aligned(8))) > #endif > ralloc_header > { > -- > 2.7.4 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC] nir: Add a uniformity analysis pass
Cc: Kenneth Graunke Cc: Connor Abbott --- src/compiler/Makefile.sources | 1 + src/compiler/nir/nir.c| 3 + src/compiler/nir/nir.h| 18 +++ src/compiler/nir/nir_analyze_uniformity.c | 211 ++ 4 files changed, 233 insertions(+) create mode 100644 src/compiler/nir/nir_analyze_uniformity.c diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index 2455d4e..407197c 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -186,6 +186,7 @@ NIR_GENERATED_FILES = \ NIR_FILES = \ nir/nir.c \ nir/nir.h \ + nir/nir_analyze_uniformity.c \ nir/nir_array.h \ nir/nir_builder.h \ nir/nir_clone.c \ diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index a9fac96..fd64ccd 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -357,6 +357,8 @@ nir_block_create(nir_shader *shader) exec_list_make_empty(&block->instr_list); + block->uniform = false; + return block; } @@ -1518,6 +1520,7 @@ nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, list_inithead(&def->if_uses); def->num_components = num_components; def->bit_size = bit_size; + def->uniform = false; if (instr->block) { nir_function_impl *impl = diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index dd1e407..4da318d 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -462,6 +462,14 @@ typedef struct nir_ssa_def { /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */ uint8_t bit_size; + + /** True if this SSA def has the same value for all invocations +* +* An SSA def can be uniform even if it is defined in non-uniform +* control-flow. In this case, it has the same value for all invocations +* which reach its definition. +*/ + bool uniform; } nir_ssa_def; struct nir_src; @@ -1480,6 +1488,14 @@ typedef struct nir_block { /** generic block index; generated by nir_index_blocks */ unsigned index; + /** True if this block is only executed uniformly +* +* A block is said to execute uniformly if, whenever one invocation enters +* the block, all invocations enter the block and have all taken the same +* path from the start block to this block. +*/ + bool uniform; + /* * Each block can only have up to 2 successors, so we put them in a simple * array - no need for anything more complicated. @@ -2567,6 +2583,8 @@ bool nir_normalize_cubemap_coords(nir_shader *shader); void nir_live_ssa_defs_impl(nir_function_impl *impl); +void nir_analyze_uniformity(nir_shader *shader); + void nir_loop_analyze_impl(nir_function_impl *impl, nir_variable_mode indirect_mask); diff --git a/src/compiler/nir/nir_analyze_uniformity.c b/src/compiler/nir/nir_analyze_uniformity.c new file mode 100644 index 000..e6e1f93 --- /dev/null +++ b/src/compiler/nir/nir_analyze_uniformity.c @@ -0,0 +1,211 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" + +/** + * Uniform analysis + */ + +static bool +src_is_uniform(nir_src *src, void *state) +{ + return src->is_ssa && src->ssa->uniform; +} + +static bool +mark_def_uniform(nir_ssa_def *def, void *state) +{ + bool *progress = state; + + if (!def->uniform) { + *progress = true; + def->uniform = true; + } + + return true; +} + +static void +analyze_uniformity_block(nir_block *block, bool *progress) +{ + nir_foreach_instr(instr, block) { + switch (instr->type) { + case nir_instr_type_load_const: + /* load_const is always uniform */ + mark_def_uniform(&nir_instr_as_load_const(instr)->def, progress); + break; + + case nir_instr_type_phi: { + /* For phis we need to take
[Mesa-dev] [PATCH 7/8] nir: Make image_size a variable-width intrinsic
--- src/compiler/glsl/glsl_to_nir.cpp | 7 --- src/compiler/nir/nir_intrinsics.h | 2 +- src/compiler/spirv/spirv_to_nir.c | 18 +++--- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index fc2a2c4..3a71afd 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -865,10 +865,11 @@ nir_visitor::visit(ir_call *ir) /* Set the intrinsic destination. */ if (ir->return_deref) { -const nir_intrinsic_info *info = -&nir_intrinsic_infos[instr->intrinsic]; +unsigned num_components = ir->return_deref->type->vector_elements; +if (instr->intrinsic == nir_intrinsic_image_size) + instr->num_components = num_components; nir_ssa_dest_init(&instr->instr, &instr->dest, - info->dest_components, 32, NULL); + num_components, 32, NULL); } if (op == nir_intrinsic_image_size || diff --git a/src/compiler/nir/nir_intrinsics.h b/src/compiler/nir/nir_intrinsics.h index 5c8f283..105c56f 100644 --- a/src/compiler/nir/nir_intrinsics.h +++ b/src/compiler/nir/nir_intrinsics.h @@ -186,7 +186,7 @@ INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0) INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0) INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0) INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, xx, xx, xx, 0) -INTRINSIC(image_size, 0, ARR(0), true, 4, 1, 0, xx, xx, xx, +INTRINSIC(image_size, 0, ARR(0), true, 0, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) INTRINSIC(image_samples, 0, ARR(0), true, 1, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index fa28576..356f19d 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -1997,17 +1997,21 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, if (opcode != SpvOpImageWrite) { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; - nir_ssa_dest_init(&intrin->instr, &intrin->dest, 4, 32, NULL); + + unsigned dest_components = + nir_intrinsic_infos[intrin->intrinsic].dest_components; + if (intrin->intrinsic == nir_intrinsic_image_size) { + dest_components = intrin->num_components = +glsl_get_vector_elements(type->type); + } + + nir_ssa_dest_init(&intrin->instr, &intrin->dest, +dest_components, 32, NULL); nir_builder_instr_insert(&b->nb, &intrin->instr); - /* The image intrinsics always return 4 channels but we may not want - * that many. Emit a mov to trim it down. - */ - unsigned swiz[4] = {0, 1, 2, 3}; val->ssa = vtn_create_ssa_value(b, type->type); - val->ssa->def = nir_swizzle(&b->nb, &intrin->dest.ssa, swiz, - glsl_get_vector_elements(type->type), false); + val->ssa->def = &intrin->dest.ssa; } else { nir_builder_instr_insert(&b->nb, &intrin->instr); } -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/8] nir: Require numbers of components to always match
+correct connor On Fri, Mar 3, 2017 at 5:12 PM, Jason Ekstrand wrote: > When NIR was first created, we were a bit lazy about numbers of components. > The rule was that a source couldn't consume more components than the thing > it was reading from. However, this leads to a lot of confusion because you > now have a thing sourcing from a vec4 but only reading two of the > components. > > The solution to this is to disallow that case and require that the number > of components always match. The one exception is ALU instructions because > they're designed to naturally swizzle things around like mad. We already > require this restriction for phi instructions. This series adds it for > intrinsics, texture instructions, and deref indirects. > > Cc: Kenneth Graunke > Cc: Connor Abbott > > Jason Ekstrand (8): > nir/intrinsics: Make load_barycentric_input take a 2-component coor > nir/copy_prop: Respect the source's number of components > nir/spirv: Restrict the number of channels in texture coordinates > nir/lower_tex: Use tex_instr_dest_size for txs destinations > anv/apply_dynamic_offsets: Only use one channel for computed offsets > i965/fs: Use num_components from the SSA def in image intrinsics > nir: Make image_size a variable-width intrinsic > nir/validate: Validate that bit sizes and components always match > > src/compiler/glsl/glsl_to_nir.cpp| 7 +- > src/compiler/nir/nir_intrinsics.h| 6 +- > src/compiler/nir/nir_lower_tex.c | 3 +- > src/compiler/nir/nir_opt_copy_propagate.c| 129 > +-- > src/compiler/nir/nir_validate.c | 101 +++--- > src/compiler/spirv/spirv_to_nir.c| 21 ++-- > src/intel/vulkan/anv_nir_apply_dynamic_offsets.c | 3 +- > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 3 +- > 8 files changed, 185 insertions(+), 88 deletions(-) > > -- > 2.5.0.400.gff86faf > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/8] nir/intrinsics: Make load_barycentric_input take a 2-component coor
Cc: "17.0 13.0" --- src/compiler/nir/nir_intrinsics.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_intrinsics.h b/src/compiler/nir/nir_intrinsics.h index f45bfe2..5c8f283 100644 --- a/src/compiler/nir/nir_intrinsics.h +++ b/src/compiler/nir/nir_intrinsics.h @@ -401,7 +401,9 @@ LOAD(input, 1, 2, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINS /* src[] = { vertex, offset }. const_index[] = { base, component } */ LOAD(per_vertex_input, 2, 2, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* src[] = { barycoord, offset }. const_index[] = { base, component } */ -LOAD(interpolated_input, 2, 2, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +INTRINSIC(load_interpolated_input, 2, ARR(2, 1), true, 0, 0, + 2, BASE, COMPONENT, xx, + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* src[] = { buffer_index, offset }. No const_index */ LOAD(ssbo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE) -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/8] i965/fs: Use num_components from the SSA def in image intrinsics
--- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 3d5967a..fc85f0e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -3760,8 +3760,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr type->sampler_array; /* Copy all the components. */ - const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; - for (unsigned c = 0; c < info->dest_components; ++c) { + for (unsigned c = 0; c < instr->dest.ssa.num_components; ++c) { if ((int)c >= type->coordinate_components()) { bld.MOV(offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c), brw_imm_d(1)); -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/8] nir: Require numbers of components to always match
When NIR was first created, we were a bit lazy about numbers of components. The rule was that a source couldn't consume more components than the thing it was reading from. However, this leads to a lot of confusion because you now have a thing sourcing from a vec4 but only reading two of the components. The solution to this is to disallow that case and require that the number of components always match. The one exception is ALU instructions because they're designed to naturally swizzle things around like mad. We already require this restriction for phi instructions. This series adds it for intrinsics, texture instructions, and deref indirects. Cc: Kenneth Graunke Cc: Connor Abbott Jason Ekstrand (8): nir/intrinsics: Make load_barycentric_input take a 2-component coor nir/copy_prop: Respect the source's number of components nir/spirv: Restrict the number of channels in texture coordinates nir/lower_tex: Use tex_instr_dest_size for txs destinations anv/apply_dynamic_offsets: Only use one channel for computed offsets i965/fs: Use num_components from the SSA def in image intrinsics nir: Make image_size a variable-width intrinsic nir/validate: Validate that bit sizes and components always match src/compiler/glsl/glsl_to_nir.cpp| 7 +- src/compiler/nir/nir_intrinsics.h| 6 +- src/compiler/nir/nir_lower_tex.c | 3 +- src/compiler/nir/nir_opt_copy_propagate.c| 129 +-- src/compiler/nir/nir_validate.c | 101 +++--- src/compiler/spirv/spirv_to_nir.c| 21 ++-- src/intel/vulkan/anv_nir_apply_dynamic_offsets.c | 3 +- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 3 +- 8 files changed, 185 insertions(+), 88 deletions(-) -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/8] nir/spirv: Restrict the number of channels in texture coordinates
Some SPIR-V texturing instructions pack more than the texture coordinate into the coordinate source. We need to mask off the unused channels. --- src/compiler/spirv/spirv_to_nir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 1dd6651..fa28576 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -1567,7 +1567,8 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, coord_components++; coord = vtn_ssa_value(b, w[idx++])->def; - p->src = nir_src_for_ssa(coord); + p->src = nir_src_for_ssa(nir_channels(&b->nb, coord, +(1 << coord_components) - 1)); p->src_type = nir_tex_src_coord; p++; break; -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/8] nir/copy_prop: Respect the source's number of components
Because we suddenly have to know how many components each source has, this makes the pass a bit more complicated. Fortunately, copy propagation is the only pass that cares about the number of components are read by any given source so it's fairly contained. Shader-db results on Sky Lake: total instructions in shared programs: 13318947 -> 13320265 (0.01%) instructions in affected programs: 260633 -> 261951 (0.51%) helped: 324 HURT: 1027 Looking through the hurt programs, about a dozen are hurt by 3 instructions and the rest are all hurt by 2 instructions. From a spot-check of the shaders, the story is always the same: They get a vec4 from somewhere (frequently an input) and use the first two or three components as a texture coordinate. Because of the vector component mismatch, we have a mov or, more likely, a vecN sitting between the texture instruction and the input. This means that the back-end inserts a bunch of MOVs and split_virtual_grfs() goes to town. Because the texture coordinate is also used by some other calculation, register coalesce can't combine them back together and we end up with an extra 2 MOV instructions in our shader. --- src/compiler/nir/nir_opt_copy_propagate.c | 129 ++ 1 file changed, 96 insertions(+), 33 deletions(-) diff --git a/src/compiler/nir/nir_opt_copy_propagate.c b/src/compiler/nir/nir_opt_copy_propagate.c index c26e07f..c4001fa 100644 --- a/src/compiler/nir/nir_opt_copy_propagate.c +++ b/src/compiler/nir/nir_opt_copy_propagate.c @@ -99,11 +99,12 @@ is_swizzleless_move(nir_alu_instr *instr) } static bool -copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if) +copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if, + unsigned num_components) { if (!src->is_ssa) { if (src->reg.indirect) - return copy_prop_src(src->reg.indirect, parent_instr, parent_if); + return copy_prop_src(src->reg.indirect, parent_instr, parent_if, 1); return false; } @@ -115,17 +116,8 @@ copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if) if (!is_swizzleless_move(alu_instr)) return false; - /* Don't let copy propagation land us with a phi that has more -* components in its source than it has in its destination. That badly -* messes up out-of-ssa. -*/ - if (parent_instr && parent_instr->type == nir_instr_type_phi) { - nir_phi_instr *phi = nir_instr_as_phi(parent_instr); - assert(phi->dest.is_ssa); - if (phi->dest.ssa.num_components != - alu_instr->src[0].src.ssa->num_components) - return false; - } + if (alu_instr->src[0].src.ssa->num_components != num_components) + return false; if (parent_instr) { nir_instr_rewrite_src(parent_instr, src, @@ -146,7 +138,7 @@ copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index) if (!src->src.is_ssa) { if (src->src.reg.indirect) return copy_prop_src(src->src.reg.indirect, &parent_alu_instr->instr, - NULL); + NULL, 1); return false; } @@ -193,51 +185,122 @@ copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index) return true; } -typedef struct { - nir_instr *parent_instr; - bool progress; -} copy_prop_state; +static bool +copy_prop_dest(nir_dest *dest, nir_instr *instr) +{ + if (!dest->is_ssa && dest->reg.indirect) + return copy_prop_src(dest->reg.indirect, instr, NULL, 1); + + return false; +} static bool -copy_prop_src_cb(nir_src *src, void *_state) +copy_prop_deref_var(nir_instr *instr, nir_deref_var *deref_var) { - copy_prop_state *state = (copy_prop_state *) _state; - while (copy_prop_src(src, state->parent_instr, NULL)) - state->progress = true; + if (!deref_var) + return false; - return true; + bool progress = false; + for (nir_deref *deref = deref_var->deref.child; +deref; deref = deref->child) { + if (deref->deref_type != nir_deref_type_array) + continue; + + nir_deref_array *arr = nir_deref_as_array(deref); + if (arr->deref_array_type != nir_deref_array_type_indirect) + continue; + + while (copy_prop_src(&arr->indirect, instr, NULL, 1)) + progress = true; + } + return progress; } static bool copy_prop_instr(nir_instr *instr) { - if (instr->type == nir_instr_type_alu) { + bool progress = false; + switch (instr->type) { + case nir_instr_type_alu: { nir_alu_instr *alu_instr = nir_instr_as_alu(instr); - bool progress = false; for (unsigned i = 0; i < nir_op_infos[alu_instr->op].num_inputs; i++) while (copy_prop_alu_src(alu_instr, i)) progress = true; - if (!alu_instr->dest.dest.is_ssa && alu_instr->dest.dest.reg.indirect) - while (copy_prop_src(alu_instr->dest.dest.reg.indirect, instr, NULL)) + while (copy_prop_dest(&alu_instr->d
[Mesa-dev] [PATCH 8/8] nir/validate: Validate that bit sizes and components always match
--- src/compiler/nir/nir_validate.c | 101 +--- 1 file changed, 63 insertions(+), 38 deletions(-) diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c index 15ba65f..16efcb2 100644 --- a/src/compiler/nir/nir_validate.c +++ b/src/compiler/nir/nir_validate.c @@ -126,10 +126,12 @@ log_error(validate_state *state, const char *cond, const char *file, int line) log_error(state, #cond, __FILE__, __LINE__); \ } while (0) -static void validate_src(nir_src *src, validate_state *state); +static void validate_src(nir_src *src, validate_state *state, + unsigned bit_size, unsigned num_components); static void -validate_reg_src(nir_src *src, validate_state *state) +validate_reg_src(nir_src *src, validate_state *state, + unsigned bit_size, unsigned num_components) { validate_assert(state, src->reg.reg != NULL); @@ -151,6 +153,13 @@ validate_reg_src(nir_src *src, validate_state *state) "using a register declared in a different function"); } + if (!src->reg.reg->is_packed) { + if (bit_size) + validate_assert(state, src->reg.reg->bit_size == bit_size); + if (num_components) + validate_assert(state, src->reg.reg->num_components == num_components); + } + validate_assert(state, (src->reg.reg->num_array_elems == 0 || src->reg.base_offset < src->reg.reg->num_array_elems) && "definitely out-of-bounds array access"); @@ -160,12 +169,13 @@ validate_reg_src(nir_src *src, validate_state *state) validate_assert(state, (src->reg.indirect->is_ssa || src->reg.indirect->reg.indirect == NULL) && "only one level of indirection allowed"); - validate_src(src->reg.indirect, state); + validate_src(src->reg.indirect, state, 32, 1); } } static void -validate_ssa_src(nir_src *src, validate_state *state) +validate_ssa_src(nir_src *src, validate_state *state, + unsigned bit_size, unsigned num_components) { validate_assert(state, src->ssa != NULL); @@ -188,11 +198,17 @@ validate_ssa_src(nir_src *src, validate_state *state) _mesa_set_add(def_state->if_uses, src); } + if (bit_size) + validate_assert(state, src->ssa->bit_size == bit_size); + if (num_components) + validate_assert(state, src->ssa->num_components == num_components); + /* TODO validate that the use is dominated by the definition */ } static void -validate_src(nir_src *src, validate_state *state) +validate_src(nir_src *src, validate_state *state, + unsigned bit_size, unsigned num_components) { if (state->instr) validate_assert(state, src->parent_instr == state->instr); @@ -200,9 +216,9 @@ validate_src(nir_src *src, validate_state *state) validate_assert(state, src->parent_if == state->if_stmt); if (src->is_ssa) - validate_ssa_src(src, state); + validate_ssa_src(src, state, bit_size, num_components); else - validate_reg_src(src, state); + validate_reg_src(src, state, bit_size, num_components); } static void @@ -247,11 +263,12 @@ validate_alu_src(nir_alu_instr *instr, unsigned index, validate_state *state) } } - validate_src(&src->src, state); + validate_src(&src->src, state, 0, 0); } static void -validate_reg_dest(nir_reg_dest *dest, validate_state *state) +validate_reg_dest(nir_reg_dest *dest, validate_state *state, + unsigned bit_size, unsigned num_components) { validate_assert(state, dest->reg != NULL); @@ -270,6 +287,13 @@ validate_reg_dest(nir_reg_dest *dest, validate_state *state) "writing to a register declared in a different function"); } + if (!dest->reg->is_packed) { + if (bit_size) + validate_assert(state, dest->reg->bit_size == bit_size); + if (num_components) + validate_assert(state, dest->reg->num_components == num_components); + } + validate_assert(state, (dest->reg->num_array_elems == 0 || dest->base_offset < dest->reg->num_array_elems) && "definitely out-of-bounds array access"); @@ -278,7 +302,7 @@ validate_reg_dest(nir_reg_dest *dest, validate_state *state) validate_assert(state, dest->reg->num_array_elems != 0); validate_assert(state, (dest->indirect->is_ssa || dest->indirect->reg.indirect == NULL) && "only one level of indirection allowed"); - validate_src(dest->indirect, state); + validate_src(dest->indirect, state, 32, 1); } } @@ -307,12 +331,18 @@ validate_ssa_def(nir_ssa_def *def, validate_state *state) } static void -validate_dest(nir_dest *dest, validate_state *state) +validate_dest(nir_dest *dest, validate_state *state, + unsigned bit_size, unsigned num_components) { - if (dest->is_ssa) + if (dest->is_ssa) { + if (bit_size) + validate_assert(state, dest->ssa.bit_size == bit_size); +
[Mesa-dev] [PATCH 5/8] anv/apply_dynamic_offsets: Only use one channel for computed offsets
The offset source of the UBO and SSBO intrinsics is only one channel. --- src/intel/vulkan/anv_nir_apply_dynamic_offsets.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c b/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c index 80ef8ee..f0f33ec 100644 --- a/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c +++ b/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c @@ -83,7 +83,8 @@ apply_dynamic_offsets_block(nir_block *block, nir_builder *b, nir_src *offset_src = nir_get_io_offset_src(intrin); nir_ssa_def *old_offset = nir_ssa_for_src(b, *offset_src, 1); - nir_ssa_def *new_offset = nir_iadd(b, old_offset, &offset_load->dest.ssa); + nir_ssa_def *new_offset = + nir_iadd(b, old_offset, nir_channel(b, &offset_load->dest.ssa, 0)); nir_instr_rewrite_src(&intrin->instr, offset_src, nir_src_for_ssa(new_offset)); -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/8] nir/lower_tex: Use tex_instr_dest_size for txs destinations
Using coord_components of the source texture is correct for everything except cube maps where it's off by one. --- src/compiler/nir/nir_lower_tex.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c index 66e2317..213406a 100644 --- a/src/compiler/nir/nir_lower_tex.c +++ b/src/compiler/nir/nir_lower_tex.c @@ -172,7 +172,8 @@ get_texture_size(nir_builder *b, nir_tex_instr *tex) txs->src[0].src = nir_src_for_ssa(nir_imm_int(b, 0)); txs->src[0].src_type = nir_tex_src_lod; - nir_ssa_dest_init(&txs->instr, &txs->dest, tex->coord_components, 32, NULL); + nir_ssa_dest_init(&txs->instr, &txs->dest, + nir_tex_instr_dest_size(txs), 32, NULL); nir_builder_instr_insert(b, &txs->instr); return nir_i2f(b, &txs->dest.ssa); -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] anv/blorp: Only set a clear color for resolves if fast-cleared
Cc: "17.0" --- src/intel/vulkan/anv_blorp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index d79c5e0..72a5980 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -1489,7 +1489,8 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer, struct blorp_surf surf; get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT, att_state->aux_usage, &surf); - surf.clear_color = vk_to_isl_color(att_state->clear_value.color); + if (att_state->fast_clear) + surf.clear_color = vk_to_isl_color(att_state->clear_value.color); /* From the Sky Lake PRM Vol. 7, "Render Target Resolve": * -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 24/24] docs: mark GL_ARB_gpu_shader_fp64 and OpenGL 4.0 as supported by i965/gen7+
Samuel Iglesias Gonsálvez writes: > Signed-off-by: Samuel Iglesias Gonsálvez Acked-by: Francisco Jerez > --- > docs/features.txt | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/docs/features.txt b/docs/features.txt > index 5905dba9b39..bb2bf884626 100644 > --- a/docs/features.txt > +++ b/docs/features.txt > @@ -107,7 +107,7 @@ GL 3.3, GLSL 3.30 --- all DONE: i965, nv50, nvc0, r600, > radeonsi, llvmpipe, soft >GL_ARB_vertex_type_2_10_10_10_rev DONE (freedreno, swr) > > > -GL 4.0, GLSL 4.00 --- all DONE: i965/hsw+, nvc0, r600, radeonsi > +GL 4.0, GLSL 4.00 --- all DONE: i965/gen7+, nvc0, r600, radeonsi > >GL_ARB_draw_buffers_blend DONE (freedreno, > i965/gen6+, nv50, llvmpipe, softpipe, swr) >GL_ARB_draw_indirect DONE (i965/gen7+, > llvmpipe, softpipe, swr) > @@ -124,7 +124,7 @@ GL 4.0, GLSL 4.00 --- all DONE: i965/hsw+, nvc0, r600, > radeonsi >- Enhanced per-sample shading DONE () >- Interpolation functions DONE () >- New overload resolution rules DONE > - GL_ARB_gpu_shader_fp64DONE (i965/hsw+, > llvmpipe, softpipe) > + GL_ARB_gpu_shader_fp64DONE (i965/gen7+, > llvmpipe, softpipe) >GL_ARB_sample_shading DONE (i965/gen6+, > nv50) >GL_ARB_shader_subroutine DONE (i965/gen6+, > nv50, llvmpipe, softpipe, swr) >GL_ARB_tessellation_shaderDONE (i965/gen7+) > -- > 2.11.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev signature.asc Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 23/24] i965: enable OpenGL 4.0 to Ivybridge/Baytrail
Samuel Iglesias Gonsálvez writes: > Signed-off-by: Samuel Iglesias Gonsálvez Reviewed-by: Francisco Jerez > --- > src/mesa/drivers/dri/i965/intel_extensions.c | 2 ++ > src/mesa/drivers/dri/i965/intel_screen.c | 6 -- > 2 files changed, 6 insertions(+), 2 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c > b/src/mesa/drivers/dri/i965/intel_extensions.c > index 9d4b109ac3f..05bc5b92187 100644 > --- a/src/mesa/drivers/dri/i965/intel_extensions.c > +++ b/src/mesa/drivers/dri/i965/intel_extensions.c > @@ -138,6 +138,8 @@ intelInitExtensions(struct gl_context *ctx) >ctx->Const.GLSLVersion = 450; > else if (brw->is_haswell && can_do_pipelined_register_writes(brw->screen)) >ctx->Const.GLSLVersion = 450; > + else if (brw->gen >= 7 && can_do_pipelined_register_writes(brw->screen)) > + ctx->Const.GLSLVersion = 400; > else if (brw->gen >= 6) >ctx->Const.GLSLVersion = 330; > else > diff --git a/src/mesa/drivers/dri/i965/intel_screen.c > b/src/mesa/drivers/dri/i965/intel_screen.c > index 83b1f3ff556..1afa74a24d6 100644 > --- a/src/mesa/drivers/dri/i965/intel_screen.c > +++ b/src/mesa/drivers/dri/i965/intel_screen.c > @@ -1545,8 +1545,10 @@ set_max_gl_versions(struct intel_screen *screen) >dri_screen->max_gl_es2_version = has_astc ? 32 : 31; >break; > case 7: > - dri_screen->max_gl_core_version = screen->devinfo.is_haswell && > - can_do_pipelined_register_writes(screen) ? 45 : 33; > + if (can_do_pipelined_register_writes(screen)) > + dri_screen->max_gl_core_version = screen->devinfo.is_haswell ? 45 : > 40; > + else > + dri_screen->max_gl_core_version = 33; >dri_screen->max_gl_compat_version = 30; >dri_screen->max_gl_es1_version = 11; >dri_screen->max_gl_es2_version = screen->devinfo.is_haswell ? 31 : 30; > -- > 2.11.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev signature.asc Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 22/24] i965: enable ARB_gpu_shader_fp64 for Ivybridge/Baytrail
Samuel Iglesias Gonsálvez writes: > Signed-off-by: Samuel Iglesias Gonsálvez Reviewed-by: Francisco Jerez > --- > src/mesa/drivers/dri/i965/intel_extensions.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c > b/src/mesa/drivers/dri/i965/intel_extensions.c > index f1290bf7b49..9d4b109ac3f 100644 > --- a/src/mesa/drivers/dri/i965/intel_extensions.c > +++ b/src/mesa/drivers/dri/i965/intel_extensions.c > @@ -204,6 +204,7 @@ intelInitExtensions(struct gl_context *ctx) >ctx->Extensions.ARB_derivative_control = true; >ctx->Extensions.ARB_framebuffer_no_attachments = true; >ctx->Extensions.ARB_gpu_shader5 = true; > + ctx->Extensions.ARB_gpu_shader_fp64 = true; >ctx->Extensions.ARB_shader_atomic_counters = true; >ctx->Extensions.ARB_shader_atomic_counter_ops = true; >ctx->Extensions.ARB_shader_clock = true; > @@ -237,7 +238,6 @@ intelInitExtensions(struct gl_context *ctx) > } > > if (brw->gen >= 8 || brw->is_haswell) { > - ctx->Extensions.ARB_gpu_shader_fp64 = true; >ctx->Extensions.ARB_shader_precision = true; >ctx->Extensions.ARB_stencil_texturing = true; >ctx->Extensions.ARB_texture_stencil8 = true; > -- > 2.11.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev signature.asc Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 21/24] i965: Use correct VertStride on align16 instructions.
Samuel Iglesias Gonsálvez writes: > From: Matt Turner > > In commit c35fa7a, we changed the "width" of DF source registers to 2, > which is conceptually fine. Unfortunately a VertStride of 2 is not > allowed by align16 instructions on IVB/BYT, and the regular VertStride > of 4 works fine in any case. > > See > generated_tests/spec/arb_gpu_shader_fp64/execution/built-in-functions/vs-round-double.shader_test > for example: > > cmp.ge.f0(8)g18<1>DFg1<0>.xyxyDF-g8<2>DF{ align16 1Q > }; > ERROR: In Align16 mode, only VertStride of 0 or 4 is allowed > cmp.ge.f0(8)g19<1>DFg1<0>.xyxyDF-g9<2>DF{ align16 2N > }; > ERROR: In Align16 mode, only VertStride of 0 or 4 is allowed > > v2: > - Add spec quote (Curro). > - Change the condition to only BRW_VERTICAL_STRIDE_2 (Curro) > > Reviewed-by: Samuel Iglesias Gonsálvez > --- > src/mesa/drivers/dri/i965/brw_eu_emit.c | 44 > + > 1 file changed, 34 insertions(+), 10 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c > b/src/mesa/drivers/dri/i965/brw_eu_emit.c > index 03aaa760163..d221405db4d 100644 > --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c > +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c > @@ -512,13 +512,25 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, > struct brw_reg reg) > brw_inst_set_src0_da16_swiz_w(devinfo, inst, > BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W)); > > - /* This is an oddity of the fact we're using the same > - * descriptions for registers in align_16 as align_1: > - */ > - if (reg.vstride == BRW_VERTICAL_STRIDE_8) > + if (reg.vstride == BRW_VERTICAL_STRIDE_8) { > +/* This is an oddity of the fact we're using the same > + * descriptions for registers in align_16 as align_1: > + */ > +brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); > + } else if (devinfo->gen == 7 && !devinfo->is_haswell && > +reg.type == BRW_REGISTER_TYPE_DF && > +reg.vstride == BRW_VERTICAL_STRIDE_2) { > +/* From HSW PRM: This workaround is IVB-specific so the HSW PRM quotation doesn't seem particularly relevant. With that fixed here and below patch is: Reviewed-by: Francisco Jerez > + * > + * "For Align16 access mode, only encodings of , 0010 > + * and 0011 are allowed. Other codes are reserved." > + * > + * Presumably the DevSNB behavior applies to IVB as well. > + */ > brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); > - else > + } else { > brw_inst_set_src0_vstride(devinfo, inst, reg.vstride); > + } >} > } > } > @@ -594,13 +606,25 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, > struct brw_reg reg) > brw_inst_set_src1_da16_swiz_w(devinfo, inst, > BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W)); > > - /* This is an oddity of the fact we're using the same > - * descriptions for registers in align_16 as align_1: > - */ > - if (reg.vstride == BRW_VERTICAL_STRIDE_8) > + if (reg.vstride == BRW_VERTICAL_STRIDE_8) { > +/* This is an oddity of the fact we're using the same > + * descriptions for registers in align_16 as align_1: > + */ > +brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); > + } else if (devinfo->gen == 7 && !devinfo->is_haswell && > +reg.type == BRW_REGISTER_TYPE_DF && > +reg.vstride == BRW_VERTICAL_STRIDE_2) { > +/* From HSW PRM: > + * > + * "For Align16 access mode, only encodings of , 0010 > + * and 0011 are allowed. Other codes are reserved." > + * > + * Presumably the DevSNB behavior applies to IVB as well. > + */ > brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); > - else > + } else { > brw_inst_set_src1_vstride(devinfo, inst, reg.vstride); > + } >} > } > } > -- > 2.11.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev signature.asc Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 100049] "ralloc: Make sure ralloc() allocations match malloc()'s alignment." causes seg fault in 32bit build
https://bugs.freedesktop.org/show_bug.cgi?id=100049 --- Comment #1 from Grazvydas Ignotas --- Patch sent: https://patchwork.freedesktop.org/patch/142123/ -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] ralloc: don't leave out the alignment factor
Experimentation shows that without alignment factor gcc and clang choose a factor of 16 even on IA-32, which doesn't match what malloc() uses (8). The problem is it makes gcc assume the pointer is 16 byte aligned, so with -O3 it starts using aligned SSE instructions that later fault, so always specify a suitable alignment factor. Cc: Jonas Pfeil Fixes: cd2b55e5 "ralloc: Make sure ralloc() allocations match malloc()'s alignment." Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100049 Signed-off-by: Grazvydas Ignotas --- no commit access src/util/ralloc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/util/ralloc.c b/src/util/ralloc.c index 03283de..7bf192e 100644 --- a/src/util/ralloc.c +++ b/src/util/ralloc.c @@ -59,8 +59,10 @@ _CRTIMP int _vscprintf(const char *format, va_list argptr); struct #ifdef _MSC_VER __declspec(align(8)) +#elif defined(__LP64__) + __attribute__((aligned(16))) #else - __attribute__((aligned)) + __attribute__((aligned(8))) #endif ralloc_header { -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 19/24] i965/vec4: fix SIMD-with lowering for CMP/MOV instructions with conditional modifiers
Samuel Iglesias Gonsálvez writes: > From: "Juan A. Suarez Romero" > > When splitting a CMP/MOV instruction with NULL dest, DF sources, and > conditional modifier; we can't use directly the flag registers, as they will > have the wrong results in IVB/BYT after the scalarization. > > Rather, we need to store the result in a temporary register, and then use > that register to set proper the flags values. > > If a MOV has a null destination register and a conditional modifier, it > can be replaced with a CMP against zero with the same conditional > modifier. By doing this replacement, we can do the SIMD lowering > without any problem. > > v2: > - Fix typo (Matt) > > Signed-off-by: Samuel Iglesias Gonsálvez > Signed-off-by: Juan A. Suarez Romero --- > src/mesa/drivers/dri/i965/brw_vec4.cpp | 80 > +++--- > 1 file changed, 74 insertions(+), 6 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp > b/src/mesa/drivers/dri/i965/brw_vec4.cpp > index adcde085305..819674e8cb9 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp > @@ -2177,6 +2177,46 @@ vec4_visitor::lower_simd_width() > * value of the instruction's dst. > */ >bool needs_temp = dst_src_regions_overlap(inst); > + > + /* When splitting instructions with conditional modifiers and NULL > + * dest we can't rely directly on the flags to store the result. > Rather, > + * we need first to enqueue the result in a temporary register, and > then > + * move those values into flags. > + */ > + bool inst_df_dst_null = > + inst->dst.is_null() && get_exec_type_size(inst) == 8 && > + inst->conditional_mod != BRW_CONDITIONAL_NONE; > + > + if (inst_df_dst_null) { > + /* If there are other DF instructions with NULL destination, > + * we need to verify if we can use the temporary register or > + * if we need an extra lowering step. > + */ > + assert(inst->opcode == BRW_OPCODE_MOV || > +inst->opcode == BRW_OPCODE_CMP); > + > + /* Replace MOV.XX with null destination with the equivalent CMP.XX > + * with null destination, so we can lower it as explained before. > + */ > + if (inst->opcode == BRW_OPCODE_MOV) { > +vec4_instruction *cmp = > + new(mem_ctx) vec4_instruction(BRW_OPCODE_CMP, dst_null_df(), > + inst->src[0], > + setup_imm_df(0.0, block, inst)); > +cmp->conditional_mod = inst->conditional_mod; > +cmp->exec_size = inst->exec_size; > +cmp->group = inst->group; > +cmp->size_written = inst->size_written; > +inst->insert_before(block, cmp); > +inst->remove(block); > +inst = cmp; > + } > + } > + dst_reg inst_dst; > + if (inst_df_dst_null) > + inst_dst = > +retype(dst_reg(VGRF, alloc.allocate(1)), BRW_REGISTER_TYPE_F); > + >for (unsigned n = 0; n < inst->exec_size / lowered_width; n++) { > unsigned channel_offset = lowered_width * n; > > @@ -2199,7 +2239,7 @@ vec4_visitor::lower_simd_width() > bool d2f_pass = (inst->opcode == VEC4_OPCODE_FROM_DOUBLE && n > 0); > /* Compute split dst region */ > dst_reg dst; > - if (needs_temp || d2f_pass) { > + if (needs_temp || d2f_pass || inst_df_dst_null) { > unsigned num_regs = DIV_ROUND_UP(size_written, REG_SIZE); > dst = retype(dst_reg(VGRF, alloc.allocate(num_regs)), > inst->dst.type); > @@ -2229,24 +2269,52 @@ vec4_visitor::lower_simd_width() > > inst->insert_before(block, linst); > > + dst_reg d2f_dst; > + if (inst_df_dst_null) { > +unsigned num_regs = DIV_ROUND_UP(lowered_width, > type_sz(BRW_REGISTER_TYPE_F)); > +d2f_dst = retype(dst_reg(VGRF, alloc.allocate(num_regs)), > BRW_REGISTER_TYPE_F); > +vec4_instruction *d2f = new(mem_ctx) > vec4_instruction(VEC4_OPCODE_FROM_DOUBLE, d2f_dst, src_reg(dst)); > +d2f->group = channel_offset; > +d2f->exec_size = lowered_width; > +d2f->size_written = lowered_width * type_sz(d2f_dst.type); > +d2f->predicate = inst->predicate; > +inst->insert_before(block, d2f); > + } > + > /* If we used a temporary to store the result of the split >* instruction, copy the result to the original destination >*/ > - if (needs_temp || d2f_pass) { > + if (needs_temp || d2f_pass || inst_df_dst_null) { > vec4_instruction *mov; > -if (d2f_pass) > +if (d2f_pass) { > mov = MOV(horiz_offset(inst->dst, n * > type_sz(inst->dst.type)), src_reg(dst)); > -
Re: [Mesa-dev] [PATCH v3 20/24] i965/vec4: Fix exec size for MOVs SET_{HIGH, LOW}_32BIT.
Samuel Iglesias Gonsálvez writes: > From: Matt Turner > > Otherwise for a pack_double_2x32_split opcode, we emit: > >vec1 64 ssa_135 = pack_double_2x32_split ssa_133, ssa_134 > mov(8) g5<1>UD g5<4>.xUD { align16 1Q > compacted }; > mov(8) g7<2>UD g5<4,4,1>UD { align1 1Q }; > ERROR: When the destination spans two registers, the source must span > two registers >(exceptions for scalar source and packed-word to packed-dword > expansion) > mov(8) g8<2>UD g5.4<4,4,1>UD { align1 2N }; > ERROR: The offset from the two source registers must be the same > mov(8) g5<1>UD g6<4>.xUD { align16 1Q > compacted }; > mov(8) g7.1<2>UD g5<4,4,1>UD { align1 1Q }; > ERROR: When the destination spans two registers, the source must span > two registers >(exceptions for scalar source and packed-word to packed-dword > expansion) > mov(8) g8.1<2>UD g5.4<4,4,1>UD { align1 2N }; > ERROR: The offset from the two source registers must be the same > > The intention was to emit mov(4)s for the instructions that have ERROR > annotations. > > See tests/spec/arb_gpu_shader_fp64/execution/vs-isinf-dvec.shader_test > for example. > > Reviewed-by: Samuel Iglesias Gonsálvez > --- > src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 2 ++ > 1 file changed, 2 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > index b570792badd..f6034bc8b76 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > @@ -2025,6 +2025,7 @@ generate_code(struct brw_codegen *p, > assert(type_sz(dst.type) == 8); > > brw_set_default_access_mode(p, BRW_ALIGN_1); > + brw_set_default_exec_size(p, BRW_EXECUTE_4); > NAK, we're missing a bug elsewhere if the exec_size coming in from the IR is not accurate. You don't happen to be doubling the execution size of this single-precision instruction, do you? > dst = retype(dst, BRW_REGISTER_TYPE_UD); > if (inst->opcode == VEC4_OPCODE_SET_HIGH_32BIT) > @@ -2037,6 +2038,7 @@ generate_code(struct brw_codegen *p, > src[0].hstride = BRW_HORIZONTAL_STRIDE_1; > brw_MOV(p, dst, src[0]); > > + brw_set_default_exec_size(p, BRW_EXECUTE_8); > brw_set_default_access_mode(p, BRW_ALIGN_16); > break; >} > -- > 2.11.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev signature.asc Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 18/24] i965/vec4: adapt setup_imm_df() to allow inserting instructions before another one
Samuel Iglesias Gonsálvez writes: > Add a new setup_imm_df() that allows the insertion of the instructions > before another one. This will be used in the lowering passes for DF > instructions. > > v2: > - Adapt emission of DIM instruction too. > > Signed-off-by: Samuel Iglesias Gonsálvez > --- > src/mesa/drivers/dri/i965/brw_vec4.h | 2 ++ > src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 26 +- > 2 files changed, 23 insertions(+), 5 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h > b/src/mesa/drivers/dri/i965/brw_vec4.h > index 29b203af89e..01b928ef4a7 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4.h > +++ b/src/mesa/drivers/dri/i965/brw_vec4.h > @@ -328,6 +328,8 @@ public: >brw_reg_type single_type); > > src_reg setup_imm_df(double v); > + src_reg setup_imm_df(double v, struct bblock_t *block, > +vec4_instruction *inst); > > vec4_instruction *shuffle_64bit_data(dst_reg dst, src_reg src, > bool for_write, > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp > index 2127415be7a..e99db2cef8e 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp > @@ -1213,6 +1213,12 @@ vec4_visitor::emit_conversion_to_double(dst_reg dst, > src_reg src, > src_reg > vec4_visitor::setup_imm_df(double v) > { > + return setup_imm_df(v, NULL, NULL); > +} > + > +src_reg > +vec4_visitor::setup_imm_df(double v, struct bblock_t *block, > brw::vec4_instruction *inst) > +{ > assert(devinfo->gen >= 7); > > if (devinfo->gen >= 8) > @@ -1223,7 +1229,10 @@ vec4_visitor::setup_imm_df(double v) > */ > if (devinfo->is_haswell) { >dst_reg dst = retype(dst_reg(VGRF, alloc.allocate(2)), > BRW_REGISTER_TYPE_DF); > - emit(DIM(dst, brw_imm_df(v)))->force_writemask_all = true; > + if (block) > + emit_before(block, inst, DIM(dst, > brw_imm_df(v)))->force_writemask_all = true; > + else > + emit(DIM(dst, brw_imm_df(v)))->force_writemask_all = true; >return swizzle(src_reg(retype(dst, BRW_REGISTER_TYPE_DF)), > BRW_SWIZZLE_); > } > > @@ -1248,10 +1257,17 @@ vec4_visitor::setup_imm_df(double v) > const dst_reg tmp = >retype(dst_reg(VGRF, alloc.allocate(2)), BRW_REGISTER_TYPE_UD); > for (int n = 0; n < 2; n++) { > - emit(MOV(writemask(offset(tmp, 8, n), WRITEMASK_X), brw_imm_ud(di.i1))) > - ->force_writemask_all = true; > - emit(MOV(writemask(offset(tmp, 8, n), WRITEMASK_Y), brw_imm_ud(di.i2))) > - ->force_writemask_all = true; > + if (block) { > + emit_before(block, inst, MOV(writemask(offset(tmp, 8, n), > WRITEMASK_X), brw_imm_ud(di.i1))) > +->force_writemask_all = true; > + emit_before(block, inst, MOV(writemask(offset(tmp, 8, n), > WRITEMASK_Y), brw_imm_ud(di.i2))) > +->force_writemask_all = true; > + } else { > + emit(MOV(writemask(offset(tmp, 8, n), WRITEMASK_X), > brw_imm_ud(di.i1))) > +->force_writemask_all = true; > + emit(MOV(writemask(offset(tmp, 8, n), WRITEMASK_Y), > brw_imm_ud(di.i2))) > +->force_writemask_all = true; > + } This would be substantially easier if you used the builder interface, you wouldn't even need to add any additional arguments to setup_imm_df() and the conditional block would be unnecessary. > } > > return swizzle(src_reg(retype(tmp, BRW_REGISTER_TYPE_DF)), > BRW_SWIZZLE_); > -- > 2.11.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev signature.asc Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 17/24] i965/vec4: consider subregister offset in live variables
Samuel Iglesias Gonsálvez writes: > From: "Juan A. Suarez Romero" > > Take into account offset values less than a full register (32 bytes) > when getting the var from register. > > This is required when dealing with an operation that writes half of the > register (like one d2x in IVB/BYT, which uses exec_size == 4). > > - v2: take in account this offset < 32 in liveness analysis too (Curro) > --- > src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp | 12 > src/mesa/drivers/dri/i965/brw_vec4_live_variables.h | 6 -- > 2 files changed, 12 insertions(+), 6 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp > index 73f658cd8fa..dc1ad21038c 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp > @@ -78,7 +78,8 @@ vec4_live_variables::setup_def_use() > if (inst->src[i].file == VGRF) { > for (unsigned j = 0; j < DIV_ROUND_UP(inst->size_read(i), > 16); j++) { >for (int c = 0; c < 4; c++) { > - const unsigned v = var_from_reg(alloc, inst->src[i], c, > j); > + const unsigned v = > +var_from_reg(alloc, inst->src[i], c, j); Neither this nor the four subsequent hunks seem to be doing anything, please drop them. > if (!BITSET_TEST(bd->def, v)) > BITSET_SET(bd->use, v); >} > @@ -101,7 +102,8 @@ vec4_live_variables::setup_def_use() > for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); > i++) { > for (int c = 0; c < 4; c++) { >if (inst->dst.writemask & (1 << c)) { > - const unsigned v = var_from_reg(alloc, inst->dst, c, i); > + const unsigned v = > +var_from_reg(alloc, inst->dst, c, i); > if (!BITSET_TEST(bd->use, v)) > BITSET_SET(bd->def, v); >} > @@ -257,7 +259,8 @@ vec4_visitor::calculate_live_intervals() >if (inst->src[i].file == VGRF) { > for (unsigned j = 0; j < DIV_ROUND_UP(inst->size_read(i), 16); > j++) { > for (int c = 0; c < 4; c++) { > - const unsigned v = var_from_reg(alloc, inst->src[i], c, j); > + const unsigned v = > + var_from_reg(alloc, inst->src[i], c, j); >start[v] = MIN2(start[v], ip); >end[v] = ip; > } > @@ -269,7 +272,8 @@ vec4_visitor::calculate_live_intervals() > for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); i++) > { > for (int c = 0; c < 4; c++) { > if (inst->dst.writemask & (1 << c)) { > - const unsigned v = var_from_reg(alloc, inst->dst, c, i); > + const unsigned v = > + var_from_reg(alloc, inst->dst, c, i); >start[v] = MIN2(start[v], ip); >end[v] = ip; > } > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h > b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h > index 8807c453743..b23df650c11 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h > +++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h > @@ -89,7 +89,8 @@ var_from_reg(const simple_allocator &alloc, const src_reg > ®, > const unsigned csize = DIV_ROUND_UP(type_sz(reg.type), 4); > unsigned result = >8 * (alloc.offsets[reg.nr] + reg.offset / REG_SIZE) + > - (BRW_GET_SWZ(reg.swizzle, c) + k / csize * 4) * csize + k % csize; > + (BRW_GET_SWZ(reg.swizzle, c) + k / csize * 4) * csize + k % csize + > + (reg.offset % REG_SIZE) / type_sz(reg.type); Looks bogus to me, the result is expressed in dwords not in type_sz units (because the live analysis pass has dword granularity). Instead of adding new terms to the expression you could just take the 'reg.offset / REG_SIZE' term out of the first parentheses and replace it with 'reg.offset / 4'. > /* Do not exceed the limit for this register */ > assert(result < 8 * (alloc.offsets[reg.nr] + alloc.sizes[reg.nr])); > return result; > @@ -103,7 +104,8 @@ var_from_reg(const simple_allocator &alloc, const dst_reg > ®, > const unsigned csize = DIV_ROUND_UP(type_sz(reg.type), 4); > unsigned result = >8 * (alloc.offsets[reg.nr] + reg.offset / REG_SIZE) + > - (c + k / csize * 4) * csize + k % csize; > + (c + k / csize * 4) * csize + k % csize + > + (reg.offset % REG_SIZE) / type_sz(reg.type); Same here. > /* Do not exceed the limit for this register */ > assert(result < 8 * (alloc.offsets[reg.nr] + alloc.sizes[reg.nr])); > return result; > -- > 2.11.0 > > ___ > mesa-de
Re: [Mesa-dev] [PATCH v3 16/24] i965/vec4: fix SIMD-width lowering for VEC4_OPCODE_FROM_DOUBLE in IVB/BYT
Samuel Iglesias Gonsálvez writes: > From: "Juan A. Suarez Romero" > > When splitting VEC4_OPCODE_FROM_DOUBLE in Ivybridge/Baytrail, the second > part should use a temporal register, and then move the values to the > second half of the original destination, so we get all the results in the > same register. > > v2: > - Fix typos (Matt). > --- > src/mesa/drivers/dri/i965/brw_vec4.cpp | 17 + > src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 1 + > 2 files changed, 14 insertions(+), 4 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp > b/src/mesa/drivers/dri/i965/brw_vec4.cpp > index 64b435f3ec4..adcde085305 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp > @@ -2191,9 +2191,15 @@ vec4_visitor::lower_simd_width() > linst->group = channel_offset; > linst->size_written = size_written; > > + /* When splitting VEC4_OPCODE_FROM_DOUBLE on Ivybridge, the second > part > + * should use in a temporal register. Later we will move the values > + * to the second half of the original destination, so we get all the > + * results in the same register. We use d2f_pass to detect this > case. > + */ > + bool d2f_pass = (inst->opcode == VEC4_OPCODE_FROM_DOUBLE && n > 0); > /* Compute split dst region */ > dst_reg dst; > - if (needs_temp) { > + if (needs_temp || d2f_pass) { > unsigned num_regs = DIV_ROUND_UP(size_written, REG_SIZE); > dst = retype(dst_reg(VGRF, alloc.allocate(num_regs)), > inst->dst.type); > @@ -2226,9 +2232,12 @@ vec4_visitor::lower_simd_width() > /* If we used a temporary to store the result of the split >* instruction, copy the result to the original destination >*/ > - if (needs_temp) { > -vec4_instruction *mov = > - MOV(offset(inst->dst, lowered_width, n), src_reg(dst)); > + if (needs_temp || d2f_pass) { > +vec4_instruction *mov; > +if (d2f_pass) > + mov = MOV(horiz_offset(inst->dst, n * > type_sz(inst->dst.type)), src_reg(dst)); I have no idea how this could possibly work... horiz_offset() expects a number of scalar components, not bytes. Anyway I have a hunch this is trying to workaround the bug I pointed out in PATCH 15... > +else > + mov = MOV(offset(inst->dst, lowered_width, n), src_reg(dst)); > mov->exec_size = lowered_width; > mov->group = channel_offset; > mov->size_written = size_written; > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > index 7fa1afc9073..b570792badd 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > @@ -1532,6 +1532,7 @@ generate_code(struct brw_codegen *p, > is_ivb_df); > >assert(inst->group % 8 == 0 || > + (inst->exec_size == 4 && inst->group % 4 == 0) || > inst->dst.type == BRW_REGISTER_TYPE_DF || > inst->src[0].type == BRW_REGISTER_TYPE_DF || > inst->src[1].type == BRW_REGISTER_TYPE_DF || > -- > 2.11.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev signature.asc Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 15/24] i965/vec4: fix VEC4_OPCODE_FROM_DOUBLE for IVB/BYT
Samuel Iglesias Gonsálvez writes: > From: "Juan A. Suarez Romero" > > In the generator we must generate slightly different code for > Ivybridge/Baytrail, because of the way the stride works in > this hardware. > --- > src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 17 - > 1 file changed, 16 insertions(+), 1 deletion(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > index 7bb1ab1879c..7fa1afc9073 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > @@ -1948,13 +1948,28 @@ generate_code(struct brw_codegen *p, > > brw_set_default_access_mode(p, BRW_ALIGN_1); > > - dst.hstride = BRW_HORIZONTAL_STRIDE_2; How would this know whether there was enough space allocated in the destination to hold the destination value with a stride of two? What if the following SIMD vector in the same GRF allocation actually contained useful single-precision data that is getting corrupted by the oversized strided destination? I think we should be doing this pre-regalloc so we can allocate a temporary large enough to hold the strided value... > + /* When converting from DF->F, we set destination's stride as 2 as > an > + * aligment requirement. But in IVB/BYT, each DF implicitly writes > + * two floats, being the first one the converted value. So we don't > + * need to explicitly set stride 2, but 1. > + */ > + if (devinfo->gen == 7 && !devinfo->is_haswell) > +dst.hstride = BRW_HORIZONTAL_STRIDE_1; > + else > +dst.hstride = BRW_HORIZONTAL_STRIDE_2; > + > dst.width = BRW_WIDTH_4; > src[0].vstride = BRW_VERTICAL_STRIDE_4; > src[0].width = BRW_WIDTH_4; > brw_MOV(p, dst, src[0]); > spread(dst, desired-stride) so you don't mess up the original destination brw_reg and have to fix it up again later. > struct brw_reg dst_as_src = dst; > + /* As we have set horizontal stride 1 instead of 2 in IVB/BYT, we > + * need to fix it here to have the expected value. > + */ > + if (devinfo->gen == 7 && !devinfo->is_haswell) > +dst_as_src.hstride = BRW_HORIZONTAL_STRIDE_2; > + > dst.hstride = BRW_HORIZONTAL_STRIDE_1; > dst.width = BRW_WIDTH_8; > brw_MOV(p, dst, dst_as_src); > -- > 2.11.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev signature.asc Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 14/24] i965/vec4: keep original type when dealing with null registers
Samuel Iglesias Gonsálvez writes: > From: "Juan A. Suarez Romero" > > Keep the original type when dealing with null registers. Specially s/specially/especially/ here and below. > because we do no want to introduce an implicit conversion between > types that could affect the conditional flags. > > This affects specially when the original type is DF, and we are working > on Ivybridge/Baytrail. > --- > src/mesa/drivers/dri/i965/brw_vec4.cpp | 4 > 1 file changed, 4 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp > b/src/mesa/drivers/dri/i965/brw_vec4.cpp > index 7080c93e550..64b435f3ec4 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp > @@ -1983,6 +1983,7 @@ vec4_visitor::convert_to_hw_regs() > case BAD_FILE: > /* Probably unused. */ > reg = brw_null_reg(); > +reg.type = src.type; You could use retype() here and below. > break; > > case MRF: > @@ -2033,6 +2034,7 @@ vec4_visitor::convert_to_hw_regs() > >case BAD_FILE: > reg = brw_null_reg(); > + reg.type = dst.type; > break; > >case IMM: > @@ -2205,6 +2207,8 @@ vec4_visitor::lower_simd_width() > } else { > if (inst->dst.file != ARF) > dst = horiz_offset(inst->dst, channel_offset); > +else > + dst.type = inst->dst.type; This shouldn't be necessary if you take into account my feedback to PATCH 13. With that fixed: Reviewed-by: Francisco Jerez > } > linst->dst = dst; > > -- > 2.11.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev signature.asc Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 13/24] i965/vec4: split DF instructions and later double its execsize in IVB/BYT
Samuel Iglesias Gonsálvez writes: > We need to split DF instructions in two on IVB/BYT as it needs an > execsize 8 to process 4 DF values (one GRF in total). > > v2: > - Rename helper and make it static inline function (Matt). > - Fix indention and add braces (Matt). > > Signed-off-by: Samuel Iglesias Gonsálvez > --- > src/mesa/drivers/dri/i965/brw_ir_vec4.h | 14 ++ > src/mesa/drivers/dri/i965/brw_vec4.cpp | 7 ++- > src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 15 +-- > 3 files changed, 33 insertions(+), 3 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h > b/src/mesa/drivers/dri/i965/brw_ir_vec4.h > index 57fc6be8f89..9d29c3fb944 100644 > --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h > +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h > @@ -405,6 +405,20 @@ regs_read(const vec4_instruction *inst, unsigned i) > reg_size); > } > > +static inline unsigned > +get_exec_type_size(const vec4_instruction *inst) > +{ > + unsigned exec_type_size = 0; > + > + for (int i = 0; i < 3; i++) { > + if (inst->src[i].type != BAD_FILE) { > + exec_type_size = MAX2(exec_type_size, type_sz(inst->src[i].type)); > + } > + } > + > + return exec_type_size; > +} > + > } /* namespace brw */ > > #endif > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp > b/src/mesa/drivers/dri/i965/brw_vec4.cpp > index 5e60eb657a7..7080c93e550 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp > @@ -2092,6 +2092,10 @@ get_lowered_simd_width(const struct gen_device_info > *devinfo, >if (inst->opcode == BRW_OPCODE_SEL && type_sz(inst->dst.type) == 8) > lowered_width = MIN2(lowered_width, 4); > Maybe add a short comment here explaining why you need to do this? > + if (devinfo->gen == 7 && !devinfo->is_haswell && > + (get_exec_type_size(inst) == 8 || type_sz(inst->dst.type) == 8)) > + lowered_width = MIN2(lowered_width, 4); > + >/* HSW PRM, 3D Media GPGPU Engine, Region Alignment Rules for Direct > * Register Addressing: > * > @@ -2199,7 +2203,8 @@ vec4_visitor::lower_simd_width() > inst->insert_before(block, copy); > } > } else { > -dst = horiz_offset(inst->dst, channel_offset); > +if (inst->dst.file != ARF) > + dst = horiz_offset(inst->dst, channel_offset); This doesn't look right, you need to give the same treatment to ARF registers as to other registers. If what you're trying to avoid here is shifting the null register incorrectly, I suggest you fix horiz_offset() to return the argument unchanged if it's the null register, because the null register logically behaves like a scalar register (this is also consistent with the way the FS back-end handles the same situation). > } > linst->dst = dst; > > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > index 847a01bd43c..7bb1ab1879c 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > @@ -1522,14 +1522,25 @@ generate_code(struct brw_codegen *p, >brw_set_default_saturate(p, inst->saturate); >brw_set_default_mask_control(p, inst->force_writemask_all); >brw_set_default_acc_write_control(p, inst->writes_accumulator); > - brw_set_default_exec_size(p, cvt(inst->exec_size) - 1); > > - assert(inst->group % inst->exec_size == 0); > + bool is_ivb_df = devinfo->gen == 7 && > + !devinfo->is_haswell && > + (get_exec_type_size(inst) == 8 || > + inst->dst.type == BRW_REGISTER_TYPE_DF); > + > + assert(inst->group % inst->exec_size == 0 || > + is_ivb_df); > + >assert(inst->group % 8 == 0 || > inst->dst.type == BRW_REGISTER_TYPE_DF || > inst->src[0].type == BRW_REGISTER_TYPE_DF || > inst->src[1].type == BRW_REGISTER_TYPE_DF || > inst->src[2].type == BRW_REGISTER_TYPE_DF); > + > + if (is_ivb_df && inst->exec_size < 8) > + inst->exec_size *= 2; > + brw_set_default_exec_size(p, cvt(inst->exec_size) - 1); > + Same comment here as for its FS counterpart... Please let's not modify the IR from the generator. >if (!inst->force_writemask_all) > brw_set_default_group(p, inst->group); > > -- > 2.11.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev signature.asc Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH V2] util/disk_cache: support caches for multiple architectures
On 03/03/17 23:27, Grazvydas Ignotas wrote: On Fri, Mar 3, 2017 at 5:27 AM, Timothy Arceri wrote: Previously we were deleting the entire cache if a user switched between 32 and 64 bit applications. V2: make the check more generic, it should now work with any platform we are likely to support. --- src/util/disk_cache.c | 19 +++ 1 file changed, 19 insertions(+) diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c index 3abdec4..92528a9 100644 --- a/src/util/disk_cache.c +++ b/src/util/disk_cache.c @@ -40,20 +40,30 @@ #include "zlib.h" #include "util/crc32.h" #include "util/u_atomic.h" #include "util/mesa-sha1.h" #include "util/ralloc.h" #include "main/errors.h" #include "disk_cache.h" +#if defined(__ILP32__) +#if defined(__x86_64__) || defined(__arm__) +#define CACHE_ARCH "ilp-32" +#else +#define CACHE_ARCH "32" +#endif +#else +#define CACHE_ARCH "64" +#endif That reports "64" for me on gcc -m32, I think only clang sets __ILP32__ for non-x32 32bit build. Well that's annoying. I'd still suggest using sizeof(void *) directly in the code, perhaps within some "const char *get_arch_bitness_string()" helper, that should be more reliable. I'm tempted to just push this: https://patchwork.freedesktop.org/patch/141891/ And worry about issues later if that's not good enough. Gražvydas ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] util/disk_cache: support caches for multiple architectures
On 04/03/17 04:05, Alan Swanson wrote: On Fri, 2017-03-03 at 12:24 +1100, Timothy Arceri wrote: On 03/03/17 11:53, Marek Olšák wrote: OK. I also wonder if 1GB isn't too conservative. Today’s games take up a lot of space. My installed games occupy 480 GB. I could certainly spare 10 GB for a shader cache if it improves gaming experience. For example, my ccache size is set to 27 GB, because 1 or 5 or 10 GB wasn't enough for my use case. I assume some gamers would have a similar attitude. Yeah I agree that 1GB is probably too small. This was set by Carl before we even knew how much data we needed to cache. I'm happy to set it at 4GB which would be a possible 8GB total. We may need to cap it at 4GB for some platforms anyway, or at least figure out a work around for this: https://bugs.freedesktop.org/show_bug.cgi?id=93089 I wouldn't say that 1G was too small currently as, for example, the cache for shader heavy DeusEx:MD is ~50M compressed per your commit message. There is the mythical quote of 640K being enough but how many games and applications do you need cached at once? Well it's not like we reserve the space, we would just not be imposing a small limit. The Dolphin emu is an example of an app that apparently creates a very large amount of shaders. A more relevant issue would then be the random eviction rather than using LRU eviction. Happy to accept patches. The random evict code was written before my time on this. However perhaps we could dynamically scale by checking statvfs and quotactl to choose MAX[1G, MIN[10% user home filesystem, 10% user home quota]]? Again patches welcome :) -- Alan. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 07/24] i965/fs: generalize the legalization d2x pass
Samuel Iglesias Gonsálvez writes: > Add support to SEL instruction and add an assert to detect unsupported > instructions than do d2x conversions. > > Signed-off-by: Samuel Iglesias Gonsálvez > --- > > Curro, this patch legalizes SEL instruction too. If other optimizations > modify later any SEL's (or any other instruction's) destination type > (hence, producing a non-lowered d2x conversion), we can call it again > around the end of fs_visitor::optimize(). Possibly together with > lower_simd_width() just in case it was added later. > This sounds rather scary... How do you make sure that this doesn't lead to an infinite legalization-optimization loop in which copy propagation reverses the effect of lower_d2x making double conversions illegal again? If you do already, why do you need to run lower_d2x multiple times? Wouldn't it be sufficient to run it once near the end of optimize(), and then re-run copy propagation and possibly DCE? > For that reason there is the inst->dst.stride > 1 condition in the > test. This detects if either we emitted a strided destination in > purpose or it was as a result of a previous lower_d2x run, we don't > want to lowered it. > The problem with this is that if you ended up with dst.stride > 1 due to different fields of the same scalar quantity being defined by two separate instructions (e.g. by using subscript(dst, ..., i)), you *need* to apply the lowering pass regardless, because otherwise the second instruction will corrupt the data written by the first instruction. > However, as I have not hit that case yet, I prefer to wait for your > opinion. What do you think? > > > src/mesa/drivers/dri/i965/brw_fs_lower_d2x.cpp | 57 > ++ > 1 file changed, 41 insertions(+), 16 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_lower_d2x.cpp > b/src/mesa/drivers/dri/i965/brw_fs_lower_d2x.cpp > index a2db1154615..330f2552929 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_lower_d2x.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_lower_d2x.cpp > @@ -33,17 +33,9 @@ fs_visitor::lower_d2x() > bool progress = false; > > foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { > - if (inst->opcode != BRW_OPCODE_MOV) > - continue; > - > - if (inst->dst.type != BRW_REGISTER_TYPE_F && > - inst->dst.type != BRW_REGISTER_TYPE_D && > - inst->dst.type != BRW_REGISTER_TYPE_UD) > - continue; > - > - if (inst->src[0].type != BRW_REGISTER_TYPE_DF && > - inst->src[0].type != BRW_REGISTER_TYPE_UQ && > - inst->src[0].type != BRW_REGISTER_TYPE_Q) > + if (get_exec_type_size(inst) != 8 || > + type_sz(inst->dst.type) >= get_exec_type_size(inst) || Note that some type conversion restrictions apply even if the execution type is single-precision, and even if the destination type size is not less than the execution type, e.g. according to the hardware docs SEL doesn't support F->UD or F->DF conversions which the condition above would consider okay. > + inst->dst.stride > 1) > continue; > >assert(inst->dst.file == VGRF); > @@ -61,13 +53,46 @@ fs_visitor::lower_d2x() > * So we need to allocate a temporary that's two registers, and then do > * a strided MOV to get the lower DWord of every Qword that has the > * result. > + * > + * This pass legalizes all the DF conversions to narrower types. > */ > - fs_reg temp = ibld.vgrf(inst->src[0].type, 1); > - fs_reg strided_temp = subscript(temp, inst->dst.type, 0); > - ibld.MOV(strided_temp, inst->src[0]); > - ibld.MOV(dst, strided_temp); > + switch (inst->opcode) { I suggest you refactor this into a helper function 'bool supports_type_conversion(inst, dst_type, exec_type)' that returns false for SEL and likely other things. It might be a useful thing to have in other places, e.g. for late optimization passes like copy propagation where we need to make sure that no additional illegal conversions are introduced. If the value returned is false you'd do what you have below for the SEL instruction, if it's true you'd do nothing unless the instruction is double-precision and the destination type is smaller than the execution type, in which case you'd do what you have below for MOV/MOV_INDIRECT. > + case SHADER_OPCODE_MOV_INDIRECT: > + case BRW_OPCODE_MOV: { > + fs_reg temp = ibld.vgrf(inst->src[0].type, 1); > + fs_reg strided_temp = subscript(temp, inst->dst.type, 0); > + /* We clone the original instruction as we are going to modify it > + * and emit another one after it. > + */ > + fs_inst *strided_inst = new(ibld.shader->mem_ctx) fs_inst(*inst); Why don't you just modify the original instruction instead of cloning it, modifying the clone, and then removing the original? > + strided_inst->dst = strided_temp; > + /* As it is an strided destination, we write n-times mo
Re: [Mesa-dev] [PATCH RFC] clover: clone pipe-resource if root-buffer already exists
Jan Vesely writes: > Fixes cl-api-enqueue-read_write-buffer on carrizo+topaz machine > > Signed-off-by: Jan Vesely > --- > I'm not sure if cloning is the right thigh to do. > Should we copy the pointer and increase ref count instead? > > PS: the assert did not trigger despite building mesa with --enable-debug > > Jan > > src/gallium/state_trackers/clover/core/resource.cpp | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/gallium/state_trackers/clover/core/resource.cpp > b/src/gallium/state_trackers/clover/core/resource.cpp > index 06fd3f6..71414f4 100644 > --- a/src/gallium/state_trackers/clover/core/resource.cpp > +++ b/src/gallium/state_trackers/clover/core/resource.cpp > @@ -172,7 +172,7 @@ root_resource::root_resource(clover::device &dev, > memory_obj &obj, > root_resource::root_resource(clover::device &dev, memory_obj &obj, > root_resource &r) : > resource(dev, obj) { > - assert(0); // XXX -- resource shared among dev and r.dev > + pipe = dev.pipe->resource_create(dev.pipe, r.pipe); AFAIK this doesn't actually create a resource shared among the two devices, which is what you need here. > } > > root_resource::~root_resource() { > -- > 2.9.3 signature.asc Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965: Rename brw_format_for_mesa_format() to brw_isl_format_for_mesa_format()
Signed-off-by: Anuj Phogat --- src/mesa/drivers/dri/i965/brw_blorp.c| 2 +- src/mesa/drivers/dri/i965/brw_context.c | 2 +- src/mesa/drivers/dri/i965/brw_meta_util.c| 2 +- src/mesa/drivers/dri/i965/brw_state.h| 2 +- src/mesa/drivers/dri/i965/brw_surface_formats.c | 14 +++--- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 4 ++-- src/mesa/drivers/dri/i965/intel_mipmap_tree.c| 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c index 9f7ba3d..fdc9dd1 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.c +++ b/src/mesa/drivers/dri/i965/brw_blorp.c @@ -294,7 +294,7 @@ brw_blorp_to_isl_format(struct brw_context *brw, mesa_format format, assert(brw->format_supported_as_render_target[format]); return brw->render_target_format[format]; } else { - return brw_format_for_mesa_format(format); + return brw_isl_format_for_mesa_format(format); } break; } diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 3688ba4..42dfed0 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -205,7 +205,7 @@ intel_texture_view_requires_resolve(struct brw_context *brw, !intel_miptree_is_lossless_compressed(brw, intel_tex->mt)) return false; - const uint32_t brw_format = brw_format_for_mesa_format(intel_tex->_Format); + const uint32_t brw_format = brw_isl_format_for_mesa_format(intel_tex->_Format); if (isl_format_supports_ccs_e(&brw->screen->devinfo, brw_format)) return false; diff --git a/src/mesa/drivers/dri/i965/brw_meta_util.c b/src/mesa/drivers/dri/i965/brw_meta_util.c index 07a160f..cbc2ded 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_util.c +++ b/src/mesa/drivers/dri/i965/brw_meta_util.c @@ -288,7 +288,7 @@ brw_is_color_fast_clear_compatible(struct brw_context *brw, * this case. At least on Gen9 this really does seem to cause problems. */ if (brw->gen >= 9 && - brw_format_for_mesa_format(mt->format) != + brw_isl_format_for_mesa_format(mt->format) != brw->render_target_format[mt->format]) return false; diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 4b7e3c2..bd05b60 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -267,7 +267,7 @@ void gen4_init_vtable_surface_functions(struct brw_context *brw); uint32_t brw_get_surface_tiling_bits(uint32_t tiling); uint32_t brw_get_surface_num_multisamples(unsigned num_samples); -uint32_t brw_format_for_mesa_format(mesa_format mesa_format); +uint32_t brw_isl_format_for_mesa_format(mesa_format mesa_format); GLuint translate_tex_target(GLenum target); diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c index 706818d..7b17e11 100644 --- a/src/mesa/drivers/dri/i965/brw_surface_formats.c +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c @@ -29,7 +29,7 @@ #include "brw_defines.h" uint32_t -brw_format_for_mesa_format(mesa_format mesa_format) +brw_isl_format_for_mesa_format(mesa_format mesa_format) { /* This table is ordered according to the enum ordering in formats.h. We do * expect that enum to be extended without our explicit initialization @@ -303,7 +303,7 @@ brw_init_surface_formats(struct brw_context *brw) uint32_t texture, render; bool is_integer = _mesa_is_format_integer_color(format); - render = texture = brw_format_for_mesa_format(format); + render = texture = brw_isl_format_for_mesa_format(format); /* The value of ISL_FORMAT_R32G32B32A32_FLOAT is 0, so don't skip * it. @@ -536,7 +536,7 @@ translate_tex_format(struct brw_context *brw, return ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS; case MESA_FORMAT_RGBA_FLOAT32: - /* The value of this BRW_SURFACEFORMAT is 0, which tricks the + /* The value of this ISL surface format is 0, which tricks the * assertion below. */ return ISL_FORMAT_R32G32B32A32_FLOAT; @@ -550,7 +550,7 @@ translate_tex_format(struct brw_context *brw, WARN_ONCE(true, "Demoting sRGB DXT1 texture to non-sRGB\n"); mesa_format = MESA_FORMAT_RGB_DXT1; } - return brw_format_for_mesa_format(mesa_format); + return brw_isl_format_for_mesa_format(mesa_format); case MESA_FORMAT_RGBA_ASTC_4x4: case MESA_FORMAT_RGBA_ASTC_5x4: @@ -566,7 +566,7 @@ translate_tex_format(struct brw_context *brw, case MESA_FORMAT_RGBA_ASTC_10x10: case MESA_FORMAT_RGBA_ASTC_12x10: case MESA_FORMAT_RGBA_ASTC_12x12: { - GLuint brw_fmt = brw_format_for_mesa_format(mesa_format); + GLuint brw_fmt = brw_isl_format_for_mesa_format(mesa_format); /** * It is possible to process thes
Re: [Mesa-dev] [PATCH] nir/builder: Add an int46 immediate helper
with s/int46/int64/, Reviewed-by: Matt Turner ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] nir/int64: Properly handle imod/irem
Reviewed-by: Matt Turner ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: Clamp texture buffer size to GL_MAX_TEXTURE_BUFFER_SIZE.
On Fri, Mar 3, 2017 at 11:32 AM, Kenneth Graunke wrote: > The OpenGL 4.5 specification's description of TexBuffer says: > > "The number of texels in the texture image is then clamped to an > implementation-dependent limit, the value of MAX_TEXTURE_BUFFER_SIZE." > > We set GL_MAX_TEXTURE_BUFFER_SIZE to 2^27. For buffers with a byte > element size, this is the maximum possible size we can encode in > SURFACE_STATE. If you bind a buffer object larger than this as a > texture buffer object, we'll exceed that limit and hit an isl assert: > >assert(num_elements <= (1ull << 27)); > > To fix this, clamp the size in bytes to MaxTextureSize / texel_size. > > Signed-off-by: Kenneth Graunke > --- > src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 18 ++ > 1 file changed, 18 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > index 8ffbc0a5fd5..e48b1e1d2d1 100644 > --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > @@ -695,6 +695,24 @@ brw_update_buffer_texture_surface(struct gl_context *ctx, >bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size); > } > > + /* The ARB_texture_buffer_specification says: > +* > +*"The number of texels in the buffer texture's texel array is given > by > +* > +* floor( / ( * sizeof()), > +* > +* where is the size of the buffer object, in basic > +* machine units and and are the element > count > +* and base data type for elements, as specified in Table X.1. The > +* number of texels in the texel array is then clamped to the > +* implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB." > +* > +* We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride, > +* so that when ISL divides by stride to obtain the number of texels, that > +* texel count is clamped to MAX_TEXTURE_BUFFER_SIZE. > +*/ > + size = MIN2(size, ctx->Const.MaxTextureBufferSize * (unsigned) > texel_size); > + > if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) { >_mesa_problem(NULL, "bad format %s for texture buffer\n", > _mesa_get_format_name(format)); > -- > 2.11.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev LGTM. Reviewed-by: Anuj Phogat ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 0/5] swr: geometry shaders
Maybe a little cleaner working on the relnotes: “Geometry shaders on on swr”; perhaps “Geometry shaders enabled on swr”? With that… Reviewed-by: Bruce Cherniak > On Mar 3, 2017, at 2:24 PM, Tim Rowley wrote: > > This patch set implements geometry shaders for the swr driver. > > Probably still some problems (some hidden by tranform feedback bugs), > but an additional 1719 piglit tests pass, and the VTK tests using > geometry shaders now pass. > > v2: > * swr: remove SWR_NEW_ALL, make new vs dirty gs state > * configure.ac: modify commit message to mention scons/automake match > * relnotes: mention swr gs addition, updated llvm requirements > > Tim Rowley (5): > configure.ac: increase required swr llvm to 3.9.0 > swr: implement geometry shaders > swr: [rasterizer core] fix primID provoking vertex for GS > docs: update features.txt for swr geometry shaders > relnotes: [swr] note addition of gs, increased llvm requirement > > configure.ac | 2 +- > docs/features.txt | 18 +- > docs/relnotes/17.1.0.html | 4 + > .../drivers/swr/rasterizer/core/frontend.cpp | 4 +- > src/gallium/drivers/swr/swr_context.cpp| 2 +- > src/gallium/drivers/swr/swr_context.h | 26 +- > src/gallium/drivers/swr/swr_draw.cpp | 50 +-- > src/gallium/drivers/swr/swr_fence_work.cpp | 21 + > src/gallium/drivers/swr/swr_fence_work.h | 3 + > src/gallium/drivers/swr/swr_scratch.cpp| 1 + > src/gallium/drivers/swr/swr_scratch.h | 1 + > src/gallium/drivers/swr/swr_screen.cpp | 6 +- > src/gallium/drivers/swr/swr_shader.cpp | 475 - > src/gallium/drivers/swr/swr_shader.h | 22 + > src/gallium/drivers/swr/swr_state.cpp | 100 - > src/gallium/drivers/swr/swr_state.h| 50 +++ > src/gallium/drivers/swr/swr_tex_sample.cpp | 6 + > 17 files changed, 716 insertions(+), 75 deletions(-) > > -- > 2.7.4 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] fixup! i965: Move the back-end compiler to src/intel/compiler
On Fri, Mar 3, 2017 at 11:41 AM, Emil Velikov wrote: > On 3 March 2017 at 18:42, Mauro Rossi wrote: > >> ok .. now I got some fixes for this here: > >> > >> https://github.com/tpalli/external-mesa/commits/move_compiler > >> > >> but yeah .. more work required :/ I don't understand why adding > >> MESA_GEN_GLSL_H to LOCAL_GENERATED_SOURCES does not help. > > > > Hi Emil, Tapani, > > > > The problem is just that the LOCAL_C_INCLUDES for new library are > > missing one line that was present in > > src/mesa/drivers/dri/i965/Android.mk, the following one: > > > >$(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_glsl, > ,)/glsl > > > Yeah, noticed a similar one missing on the automake/autoconf side. > > Jason, how would you like the fixes to squash - separate patches, > branch with separate "squash !..." or a branch with them squashed > already ? > I don't care ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/4] configure.ac: increase required swr llvm to 3.9.0
> On Mar 3, 2017, at 5:55 AM, Emil Velikov wrote: > > On 3 March 2017 at 01:16, Tim Rowley wrote: >> GS implementation uses the masked.{gather,store} intrinsics, >> introduced in llvm-3.9.0. > > Please mention in the commit message that the SCons build already > requires 3.9 or later. > Can you add a note about the LLVM requirement and GS support in > docs/relnotes/17.1.0.html, with a separate commit on top ? Both of these are in v2 of the patch set. > With this we have some ~20 preprocessor conditionals which want to be > cleaned up. Look for > $ git grep "LLVM_.*VERSION\|HAVE_LLVM" -- src/gallium/drivers/swr/ Ah, good catch. We’ve been ratcheting up our required llvm version without cleaning out some of the cruft. Internally we’re still using 3.8 so not all of these can be removed. I’ll work on that in a follow-up patch, as it’s unrelated to the geometry shader implementation. -Tim ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC 00/11] GL_ARB_gpu_shader_fp64
On Fri, Mar 3, 2017 at 11:41 AM, Ilia Mirkin wrote: > On Fri, Mar 3, 2017 at 2:16 PM, Jason Ekstrand > wrote: > > Hey Elie! > > > > On Fri, Mar 3, 2017 at 8:22 AM, Elie Tournier > > wrote: > >> > >> From: Elie Tournier > >> > >> This series is based on Ian's work about GL_ARB_gpu_shader_int64 [1]. > >> The goal is to expose GL_ARB_shader_fp64 to OpenGL 3.0 GPUs. > >> > >> Each function can be independently tested using shader_runner from > piglit. > >> The piglit files are stored on github [2]. > >> > >> [1] > >> https://lists.freedesktop.org/archives/mesa-dev/2016- > November/136718.html > >> [2] https://github.com/Hopetech/libSoftFloat > > > > > > Glad to see this finally turning into code. > > > > Before, we get too far into things, I'd like to talk about the approach a > > bit. First off, if we (Intel) are going to use this on any hardware, we > > would really like it to be in NIR. The reason for this is that NIR has a > > much more powerful algebraic optimizer than GLSL IR and we would like to > > have as few fp64 instructions as possible before we start lowering them > to > > piles of integer math. I believe Ian's plan for this was that someone > would > > write a nir_builder back-end for the stand-alone compiler. > Unfortunately, > > he sort-of left that as "an exercise to the reader" and no code exists > to my > > knowledge. If we're going to write things in GLSL, we really need that > NIR > > back-end. > > I'm not sure what the impetus was for developing a softfloat library > (but I'm a big fan). but the current situation is that it will largely > just be useful for AMD Evergreen/Northern Islands chips, which consume > TGSI produced from GLSL. (Aside: [1].) As such, I'm not sure if a push > towards NIR is warranted -- it would cause a more convoluted path > towards the intended target. > Whether or not i965 wants softfloat is an ongoing debate. On the one hand, we have "hardware support" for it starting with ivy bridge. On the other hand, early hardware support is sufficiently terrible that softfloat may end up being a better plan. Also, I wouldn't be surprised if, at some point in the future, some hardware engineer decides they can save a bunch of power on low-power parts if they delete the fp64 hardware. Since we ship desktop GL on those parts, loosing 4.0 would be bad. I don't want to paint ourselves into a corner on fp64. > I do agree with the larger point - the lowering should be done as late > as possible in order to enable algebraic-style optimizations. (This is > also why I've argued that optimizing in the frontend is too early - it > should be all just be done in the backend, as additional calculations > can easily make their way into the flow. I realize that's impractical > for i965 though as the backend is not SSA though, and some opts are > necessary in GLSL in order to perform the necessary validation.) > That's not really an accurate account of why we do it in NIR for i965... By the time we get done with all the lowering we do in NIR, the NIR code looks a lot like back-end code. Certainly, any optimizations on fp64 operations will already have been done. It's just that anything that looks too much like i965 hardware will be a pain to optimize. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 4/5] docs: update features.txt for swr geometry shaders
--- docs/features.txt | 18 +- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/features.txt b/docs/features.txt index 67d177d..df1860d 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -78,18 +78,18 @@ GL 3.1, GLSL 1.40 --- all DONE: freedreno, i965, nv50, nvc0, r600, radeonsi, llv GL_EXT_texture_snorm (Signed normalized textures) DONE () -GL 3.2, GLSL 1.50 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe +GL 3.2, GLSL 1.50 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe, swr Core/compatibility profiles DONE Geometry shaders DONE () - GL_ARB_vertex_array_bgra (BGRA vertex order) DONE (freedreno, swr) - GL_ARB_draw_elements_base_vertex (Base vertex offset) DONE (freedreno, swr) - GL_ARB_fragment_coord_conventions (Frag shader coord) DONE (freedreno, swr) - GL_ARB_provoking_vertex (Provoking vertex)DONE (freedreno, swr) - GL_ARB_seamless_cube_map (Seamless cubemaps) DONE (freedreno, swr) - GL_ARB_texture_multisample (Multisample textures) DONE (swr) - GL_ARB_depth_clamp (Frag depth clamp) DONE (freedreno, swr) - GL_ARB_sync (Fence objects) DONE (freedreno, swr) + GL_ARB_vertex_array_bgra (BGRA vertex order) DONE (freedreno) + GL_ARB_draw_elements_base_vertex (Base vertex offset) DONE (freedreno) + GL_ARB_fragment_coord_conventions (Frag shader coord) DONE (freedreno) + GL_ARB_provoking_vertex (Provoking vertex)DONE (freedreno) + GL_ARB_seamless_cube_map (Seamless cubemaps) DONE (freedreno) + GL_ARB_texture_multisample (Multisample textures) DONE () + GL_ARB_depth_clamp (Frag depth clamp) DONE (freedreno) + GL_ARB_sync (Fence objects) DONE (freedreno) GLX_ARB_create_context_profileDONE -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 5/5] relnotes: [swr] note addition of gs, increased llvm requirement
--- docs/relnotes/17.1.0.html | 4 1 file changed, 4 insertions(+) diff --git a/docs/relnotes/17.1.0.html b/docs/relnotes/17.1.0.html index 9ababcc..3678173 100644 --- a/docs/relnotes/17.1.0.html +++ b/docs/relnotes/17.1.0.html @@ -47,6 +47,7 @@ Note: some of the new features are only available with certain drivers. GL_ARB_gpu_shader_int64 on i965/gen8+, nvc0, radeonsi, softpipe, llvmpipe GL_ARB_transform_feedback2 on i965/gen6 GL_ARB_transform_feedback_overflow_query on i965/gen6+ +Geometry shaders on on swr Bug fixes @@ -56,8 +57,11 @@ Note: some of the new features are only available with certain drivers. Changes + Removed the ilo gallium driver. The configure option --enable-gallium-llvm is superseded by --enable-llvm. +swr driver now requires llvm >= 3.9.0. + -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 1/5] configure.ac: increase required swr llvm to 3.9.0
GS implementation uses the masked.{gather,store} intrinsics, introduced in llvm-3.9.0. swr llvm version requirement in automake and scons now match (scons already needed >= 3.9). --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 943bc05..73bd749 100644 --- a/configure.ac +++ b/configure.ac @@ -107,7 +107,7 @@ LLVM_REQUIRED_OPENCL=3.6.0 LLVM_REQUIRED_R600=3.6.0 LLVM_REQUIRED_RADEONSI=3.6.0 LLVM_REQUIRED_RADV=3.9.0 -LLVM_REQUIRED_SWR=3.6.0 +LLVM_REQUIRED_SWR=3.9.0 dnl Check for progs AC_PROG_CPP -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 3/5] swr: [rasterizer core] fix primID provoking vertex for GS
--- src/gallium/drivers/swr/rasterizer/core/frontend.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index c150c51..18728e5 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -815,7 +815,7 @@ static void GeometryShaderStage( { simdvector primIdAttrib[3]; gsPa.Assemble(VERTEX_PRIMID_SLOT, primIdAttrib); -vPrimId = _simd_castps_si(primIdAttrib[0].x); +vPrimId = _simd_castps_si(primIdAttrib[state.frontendState.topologyProvokingVertex].x); } else { @@ -1682,4 +1682,4 @@ PFN_FE_WORK_FUNC GetProcessDrawFunc( bool HasRasterization) { return TemplateArgUnroller::GetFunc(IsIndexed, IsCutIndexEnabled, HasTessellation, HasGeometryShader, HasStreamOut, HasRasterization); -} \ No newline at end of file +} -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 2/5] swr: implement geometry shaders
--- src/gallium/drivers/swr/swr_context.cpp| 2 +- src/gallium/drivers/swr/swr_context.h | 26 +- src/gallium/drivers/swr/swr_draw.cpp | 50 +-- src/gallium/drivers/swr/swr_fence_work.cpp | 21 ++ src/gallium/drivers/swr/swr_fence_work.h | 3 + src/gallium/drivers/swr/swr_scratch.cpp| 1 + src/gallium/drivers/swr/swr_scratch.h | 1 + src/gallium/drivers/swr/swr_screen.cpp | 6 +- src/gallium/drivers/swr/swr_shader.cpp | 475 - src/gallium/drivers/swr/swr_shader.h | 22 ++ src/gallium/drivers/swr/swr_state.cpp | 100 +- src/gallium/drivers/swr/swr_state.h| 50 +++ src/gallium/drivers/swr/swr_tex_sample.cpp | 6 + 13 files changed, 700 insertions(+), 63 deletions(-) diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp index b89ce1b..1c98ac2 100644 --- a/src/gallium/drivers/swr/swr_context.cpp +++ b/src/gallium/drivers/swr/swr_context.cpp @@ -307,7 +307,7 @@ swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info) util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vertex_buffer); util_blitter_save_vertex_elements(ctx->blitter, (void *)ctx->velems); util_blitter_save_vertex_shader(ctx->blitter, (void *)ctx->vs); - /*util_blitter_save_geometry_shader(ctx->blitter, (void*)ctx->gs);*/ + util_blitter_save_geometry_shader(ctx->blitter, (void*)ctx->gs); util_blitter_save_so_targets( ctx->blitter, ctx->num_so_targets, diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h index 04e11fe..46ca611 100644 --- a/src/gallium/drivers/swr/swr_context.h +++ b/src/gallium/drivers/swr/swr_context.h @@ -40,16 +40,17 @@ #define SWR_NEW_SAMPLER_VIEW (1 << 4) #define SWR_NEW_VS (1 << 5) #define SWR_NEW_FS (1 << 6) -#define SWR_NEW_VSCONSTANTS (1 << 7) -#define SWR_NEW_FSCONSTANTS (1 << 8) -#define SWR_NEW_VERTEX (1 << 9) -#define SWR_NEW_STIPPLE (1 << 10) -#define SWR_NEW_SCISSOR (1 << 11) -#define SWR_NEW_VIEWPORT (1 << 12) -#define SWR_NEW_FRAMEBUFFER (1 << 13) -#define SWR_NEW_CLIP (1 << 14) -#define SWR_NEW_SO (1 << 15) -#define SWR_NEW_ALL 0x +#define SWR_NEW_GS (1 << 7) +#define SWR_NEW_VSCONSTANTS (1 << 8) +#define SWR_NEW_FSCONSTANTS (1 << 9) +#define SWR_NEW_GSCONSTANTS (1 << 10) +#define SWR_NEW_VERTEX (1 << 11) +#define SWR_NEW_STIPPLE (1 << 12) +#define SWR_NEW_SCISSOR (1 << 13) +#define SWR_NEW_VIEWPORT (1 << 14) +#define SWR_NEW_FRAMEBUFFER (1 << 15) +#define SWR_NEW_CLIP (1 << 16) +#define SWR_NEW_SO (1 << 17) namespace std { @@ -85,11 +86,15 @@ struct swr_draw_context { uint32_t num_constantsVS[PIPE_MAX_CONSTANT_BUFFERS]; const float *constantFS[PIPE_MAX_CONSTANT_BUFFERS]; uint32_t num_constantsFS[PIPE_MAX_CONSTANT_BUFFERS]; + const float *constantGS[PIPE_MAX_CONSTANT_BUFFERS]; + uint32_t num_constantsGS[PIPE_MAX_CONSTANT_BUFFERS]; swr_jit_texture texturesVS[PIPE_MAX_SHADER_SAMPLER_VIEWS]; swr_jit_sampler samplersVS[PIPE_MAX_SAMPLERS]; swr_jit_texture texturesFS[PIPE_MAX_SHADER_SAMPLER_VIEWS]; swr_jit_sampler samplersFS[PIPE_MAX_SAMPLERS]; + swr_jit_texture texturesGS[PIPE_MAX_SHADER_SAMPLER_VIEWS]; + swr_jit_sampler samplersGS[PIPE_MAX_SAMPLERS]; float userClipPlanes[PIPE_MAX_CLIP_PLANES][4]; @@ -112,6 +117,7 @@ struct swr_context { struct swr_vertex_shader *vs; struct swr_fragment_shader *fs; + struct swr_geometry_shader *gs; struct swr_vertex_element_state *velems; /** Other rendering state */ diff --git a/src/gallium/drivers/swr/swr_draw.cpp b/src/gallium/drivers/swr/swr_draw.cpp index f764efe..c43f4a5 100644 --- a/src/gallium/drivers/swr/swr_draw.cpp +++ b/src/gallium/drivers/swr/swr_draw.cpp @@ -32,48 +32,6 @@ #include "util/u_prim.h" /* - * Convert mesa PIPE_PRIM_X to SWR enum PRIMITIVE_TOPOLOGY - */ -static INLINE enum PRIMITIVE_TOPOLOGY -swr_convert_prim_topology(const unsigned mode) -{ - switch (mode) { - case PIPE_PRIM_POINTS: - return TOP_POINT_LIST; - case PIPE_PRIM_LINES: - return TOP_LINE_LIST; - case PIPE_PRIM_LINE_LOOP: - return TOP_LINE_LOOP; - case PIPE_PRIM_LINE_STRIP: - return TOP_LINE_STRIP; - case PIPE_PRIM_TRIANGLES: - return TOP_TRIANGLE_LIST; - case PIPE_PRIM_TRIANGLE_STRIP: - return TOP_TRIANGLE_STRIP; - case PIPE_PRIM_TRIANGLE_FAN: - return TOP_TRIANGLE_FAN; - case PIPE_PRIM_QUADS: - return TOP_QUAD_LIST; - case PIPE_PRIM_QUAD_STRIP: - return TOP_QUAD_STRIP; - case PIPE_PRIM_POLYGON: - return TOP_TRIANGLE_FAN; /* XXX TOP_POLYGON; */ - case PIPE_PRIM_LINES_ADJACENCY: - return TOP_LINE_LIST_ADJ; - case PIPE_PRIM_LINE_STRIP_ADJACENCY: - return TOP_LISTSTRIP_ADJ; - case PIPE_PRIM_TRIANGLES_ADJACENCY: - return TOP_TRI_LIST_ADJ; - case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: - return TOP_TRI_STRIP_ADJ; - default: - assert(0 && "Unknown topology"); - return TOP_UNK
[Mesa-dev] [PATCH v2 0/5] swr: geometry shaders
This patch set implements geometry shaders for the swr driver. Probably still some problems (some hidden by tranform feedback bugs), but an additional 1719 piglit tests pass, and the VTK tests using geometry shaders now pass. v2: * swr: remove SWR_NEW_ALL, make new vs dirty gs state * configure.ac: modify commit message to mention scons/automake match * relnotes: mention swr gs addition, updated llvm requirements Tim Rowley (5): configure.ac: increase required swr llvm to 3.9.0 swr: implement geometry shaders swr: [rasterizer core] fix primID provoking vertex for GS docs: update features.txt for swr geometry shaders relnotes: [swr] note addition of gs, increased llvm requirement configure.ac | 2 +- docs/features.txt | 18 +- docs/relnotes/17.1.0.html | 4 + .../drivers/swr/rasterizer/core/frontend.cpp | 4 +- src/gallium/drivers/swr/swr_context.cpp| 2 +- src/gallium/drivers/swr/swr_context.h | 26 +- src/gallium/drivers/swr/swr_draw.cpp | 50 +-- src/gallium/drivers/swr/swr_fence_work.cpp | 21 + src/gallium/drivers/swr/swr_fence_work.h | 3 + src/gallium/drivers/swr/swr_scratch.cpp| 1 + src/gallium/drivers/swr/swr_scratch.h | 1 + src/gallium/drivers/swr/swr_screen.cpp | 6 +- src/gallium/drivers/swr/swr_shader.cpp | 475 - src/gallium/drivers/swr/swr_shader.h | 22 + src/gallium/drivers/swr/swr_state.cpp | 100 - src/gallium/drivers/swr/swr_state.h| 50 +++ src/gallium/drivers/swr/swr_tex_sample.cpp | 6 + 17 files changed, 716 insertions(+), 75 deletions(-) -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC 00/11] GL_ARB_gpu_shader_fp64
On Fri, Mar 3, 2017 at 2:16 PM, Jason Ekstrand wrote: > Hey Elie! > > On Fri, Mar 3, 2017 at 8:22 AM, Elie Tournier > wrote: >> >> From: Elie Tournier >> >> This series is based on Ian's work about GL_ARB_gpu_shader_int64 [1]. >> The goal is to expose GL_ARB_shader_fp64 to OpenGL 3.0 GPUs. >> >> Each function can be independently tested using shader_runner from piglit. >> The piglit files are stored on github [2]. >> >> [1] >> https://lists.freedesktop.org/archives/mesa-dev/2016-November/136718.html >> [2] https://github.com/Hopetech/libSoftFloat > > > Glad to see this finally turning into code. > > Before, we get too far into things, I'd like to talk about the approach a > bit. First off, if we (Intel) are going to use this on any hardware, we > would really like it to be in NIR. The reason for this is that NIR has a > much more powerful algebraic optimizer than GLSL IR and we would like to > have as few fp64 instructions as possible before we start lowering them to > piles of integer math. I believe Ian's plan for this was that someone would > write a nir_builder back-end for the stand-alone compiler. Unfortunately, > he sort-of left that as "an exercise to the reader" and no code exists to my > knowledge. If we're going to write things in GLSL, we really need that NIR > back-end. I'm not sure what the impetus was for developing a softfloat library (but I'm a big fan). but the current situation is that it will largely just be useful for AMD Evergreen/Northern Islands chips, which consume TGSI produced from GLSL. (Aside: [1].) As such, I'm not sure if a push towards NIR is warranted -- it would cause a more convoluted path towards the intended target. I do agree with the larger point - the lowering should be done as late as possible in order to enable algebraic-style optimizations. (This is also why I've argued that optimizing in the frontend is too early - it should be all just be done in the backend, as additional calculations can easily make their way into the flow. I realize that's impractical for i965 though as the backend is not SSA though, and some opts are necessary in GLSL in order to perform the necessary validation.) Cheers, -ilia [1] There's also an effort currently underway to implement proper accuracy fp64 rcp/rsq/sqrt for Fermi and newer chips, but that will likely end up as library functions in codegen, esp in part because it will make use of nvidia-specific shader opcodes. I guess this may be useful for the NVIDIA G200 chip to be able to expose ARB_gpu_shader_fp64 (as it only supports addition and multiplication natively), but I doubt there's a lot of demand for that. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] fixup! i965: Move the back-end compiler to src/intel/compiler
On 3 March 2017 at 18:42, Mauro Rossi wrote: >> ok .. now I got some fixes for this here: >> >> https://github.com/tpalli/external-mesa/commits/move_compiler >> >> but yeah .. more work required :/ I don't understand why adding >> MESA_GEN_GLSL_H to LOCAL_GENERATED_SOURCES does not help. > > Hi Emil, Tapani, > > The problem is just that the LOCAL_C_INCLUDES for new library are > missing one line that was present in > src/mesa/drivers/dri/i965/Android.mk, the following one: > >$(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_glsl,,)/glsl > Yeah, noticed a similar one missing on the automake/autoconf side. Jason, how would you like the fixes to squash - separate patches, branch with separate "squash !..." or a branch with them squashed already ? -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] EGL/Android: Add EGL_EXT_buffer_age extension
On 3 March 2017 at 08:30, Tapani Pälli wrote: > As discussed offline, this passes all dEQP tests for the extension on > Android-IA (let's add this to commit message when pushing). > > Reviewed-by: Tapani Pälli > Ok Let's land this. Kalyan promised [over at #android-ia] that the team with follow with de-duplicating the code in due time. Atm we have [nearly] identical to this code in platform_gbm and platform_wayland. Acked-by: Emil Velikov -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2] gallium/targets: don't leave an empty target directory(ies)
Reviewed-by: Matt Turner Thanks Emil! ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965: Clamp texture buffer size to GL_MAX_TEXTURE_BUFFER_SIZE.
The OpenGL 4.5 specification's description of TexBuffer says: "The number of texels in the texture image is then clamped to an implementation-dependent limit, the value of MAX_TEXTURE_BUFFER_SIZE." We set GL_MAX_TEXTURE_BUFFER_SIZE to 2^27. For buffers with a byte element size, this is the maximum possible size we can encode in SURFACE_STATE. If you bind a buffer object larger than this as a texture buffer object, we'll exceed that limit and hit an isl assert: assert(num_elements <= (1ull << 27)); To fix this, clamp the size in bytes to MaxTextureSize / texel_size. Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 18 ++ 1 file changed, 18 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 8ffbc0a5fd5..e48b1e1d2d1 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -695,6 +695,24 @@ brw_update_buffer_texture_surface(struct gl_context *ctx, bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size); } + /* The ARB_texture_buffer_specification says: +* +*"The number of texels in the buffer texture's texel array is given by +* +* floor( / ( * sizeof()), +* +* where is the size of the buffer object, in basic +* machine units and and are the element count +* and base data type for elements, as specified in Table X.1. The +* number of texels in the texel array is then clamped to the +* implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB." +* +* We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride, +* so that when ISL divides by stride to obtain the number of texels, that +* texel count is clamped to MAX_TEXTURE_BUFFER_SIZE. +*/ + size = MIN2(size, ctx->Const.MaxTextureBufferSize * (unsigned) texel_size); + if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) { _mesa_problem(NULL, "bad format %s for texture buffer\n", _mesa_get_format_name(format)); -- 2.11.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2] gallium/targets: don't leave an empty target directory(ies)
Some drivers do not support certain targets - for example nouveau doesn't do VAAPI, while freedreno doesn't do of the video backends. As such if we enter vdpau when building freedreno/ilo/etc, a vdpau/ folder will be created, empty library will be build and almost immediately removed. Thus keeping an empty vdpau/ folder around. There are two ways to fix this. * add substantial tracking in configure/makefiles so that we never end up in targets/vdpau Downsides: Error prone, as the configure checks and the 'include gallium/drivers/foo/Automake.inc' can easily get out of sync. * remove the folder, if empty, alongside the empty library. Downsides: In the latter case vdpau/ might be empty before the mesa build has started, yet we'll remove it either way. This patch implements the latter option, as the downside isn't that significant, plus the patch is way shorter ;-) v2: use has_drivers to track since TARGET_DRIVERS can contain space, hence neither string comparison nor -n/-z works correctly. Cc: Matt Turner Gentoo Bugzilla: https://bugs.gentoo.org/545230 Signed-off-by: Emil Velikov --- Matt, I've dropped your r-b since the method was completely reworked. As a nice bonus we can now drop the silly stderr/strout direction and the "|| true" part. Previously we needed them both. --- src/gallium/targets/dri/Makefile.am | 4 +++- src/gallium/targets/vdpau/Makefile.am | 4 +++- src/gallium/targets/xvmc/Makefile.am | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/gallium/targets/dri/Makefile.am b/src/gallium/targets/dri/Makefile.am index bca747faa4..bcbf5dfa9b 100644 --- a/src/gallium/targets/dri/Makefile.am +++ b/src/gallium/targets/dri/Makefile.am @@ -141,10 +141,12 @@ endif # gallium_dri.so in the set of final installed files. install-data-hook: for i in $(TARGET_DRIVERS); do \ + has_drivers=1; \ ln -f $(DESTDIR)$(dridir)/gallium_dri.so\ $(DESTDIR)$(dridir)/$${i}_dri.so; \ done; \ - $(RM) $(DESTDIR)$(dridir)/gallium_dri.* + $(RM) $(DESTDIR)$(dridir)/gallium_dri.*;\ + test $${has_drivers} -eq 1 || $(RM) -d $(DESTDIR)$(dridir) uninstall-hook: for i in $(TARGET_DRIVERS); do \ diff --git a/src/gallium/targets/vdpau/Makefile.am b/src/gallium/targets/vdpau/Makefile.am index 97c0ab27f5..a0f310bc59 100644 --- a/src/gallium/targets/vdpau/Makefile.am +++ b/src/gallium/targets/vdpau/Makefile.am @@ -108,6 +108,7 @@ endif install-data-hook: $(AM_V_GEN)dest_dir=$(DESTDIR)/$(vdpaudir); \ for i in $(TARGET_DRIVERS); do \ + has_drivers=1; \ j=libvdpau_gallium.$(LIB_EXT); \ k=libvdpau_$${i}.$(LIB_EXT);\ l=$${k}.$(VDPAU_MAJOR).$(VDPAU_MINOR).0;\ @@ -120,7 +121,8 @@ install-data-hook: ln -sf $${l}\ $${dest_dir}/$${k}; \ done; \ - $(RM) $${dest_dir}/libvdpau_gallium.* + $(RM) $${dest_dir}/libvdpau_gallium.*; \ + test $${has_drivers} -eq 1 || $(RM) -d $${dest_dir} uninstall-hook: for i in $(TARGET_DRIVERS); do \ diff --git a/src/gallium/targets/xvmc/Makefile.am b/src/gallium/targets/xvmc/Makefile.am index c8bac58961..b65a191eab 100644 --- a/src/gallium/targets/xvmc/Makefile.am +++ b/src/gallium/targets/xvmc/Makefile.am @@ -70,6 +70,7 @@ endif install-data-hook: $(AM_V_GEN)dest_dir=$(DESTDIR)/$(xvmcdir); \ for i in $(TARGET_DRIVERS); do \ + has_drivers=1; \ j=libXvMCgallium.$(LIB_EXT);\ k=libXvMC$${i}.$(LIB_EXT); \ l=$${k}.$(XVMC_MAJOR).$(XVMC_MINOR).0; \ @@ -82,7 +83,8 @@ install-data-hook: ln -sf $${l}\ $${dest_dir}/$${k}; \ done; \ - $(RM) $${dest_dir}/libXvMCgallium.* + $(RM) $${dest_dir}/libXvMCgallium.*;\ + test $${has_drivers} -eq 1 || $(RM) -d $${dest_dir} uninstall-hook: for i in $(TARGET_DRIVERS); do \ -- 2.11.1 ___ mesa
Re: [Mesa-dev] [RFC 00/11] GL_ARB_gpu_shader_fp64
One other comment. I'm not sure if you've seen it but, if you haven't, you should check out what Connor and the Igalia guys already did for NIR: https://cgit.freedesktop.org/mesa/mesa/tree/src/compiler/nir/nir_lower_double_ops.c It's not full soft-float but there's some very nice algorithms in there for things such as rcp. On Fri, Mar 3, 2017 at 11:16 AM, Jason Ekstrand wrote: > Hey Elie! > > On Fri, Mar 3, 2017 at 8:22 AM, Elie Tournier > wrote: > >> From: Elie Tournier >> >> This series is based on Ian's work about GL_ARB_gpu_shader_int64 [1]. >> The goal is to expose GL_ARB_shader_fp64 to OpenGL 3.0 GPUs. >> >> Each function can be independently tested using shader_runner from piglit. >> The piglit files are stored on github [2]. >> >> [1] https://lists.freedesktop.org/archives/mesa-dev/2016-Novembe >> r/136718.html >> [2] https://github.com/Hopetech/libSoftFloat >> > > Glad to see this finally turning into code. > > Before, we get too far into things, I'd like to talk about the approach a > bit. First off, if we (Intel) are going to use this on any hardware, we > would really like it to be in NIR. The reason for this is that NIR has a > much more powerful algebraic optimizer than GLSL IR and we would like to > have as few fp64 instructions as possible before we start lowering them to > piles of integer math. I believe Ian's plan for this was that someone > would write a nir_builder back-end for the stand-alone compiler. > Unfortunately, he sort-of left that as "an exercise to the reader" and no > code exists to my knowledge. If we're going to write things in GLSL, we > really need that NIR back-end. > > When implemneting int64 (which needs similar lowering) for the Vulkan > driver, I took the opportunity to try doing it directly in nir_builder > instead of writing back-end code for the stand-alone compiler. All in all, > I'm fairly happy with the result. You can find my (almost finished) branch > here: > > https://cgit.freedesktop.org/~jekstrand/mesa/log/?h=wip/nir-int64 > > This approach had several advantages: > > 1. The compiler does less work. Loops can be automatically unrolled, you > can choose to use select instead of control-flow, it doesn't generate > functions that have to be inlined, etc. Now, in GLSL IR, using functions > may actually be a requirement because it's a tree-based IR and adding stuff > to the middle of the tree can be tricky. Also, I'm pretty sure they're a > requirement for control-flow. NIR is flat so it's a bit nicer in that > regard. > > 2. It doesn't require additional compiler infrastructure for converting > GLSL to compiler code. We've gone back-and-forth over the years about how > much is too much codegen. At one point, the build process built the GLSL > compiler and used it to compile GLSL to compiler code for the built-ins and > then built that into the compiler. The build system for doing this was a > mess. The result was that Eric wrote ir_builder and all the code was moved > over to that. A quick look at eiether GLSL IR or NIR will show you that we > haven't completely rejected codegen but one always has to ask if it's > really the best solution. Running the stand-alone compiler to generate > code and then checking it in isn't a terrible solution, but it does seem > like at it could be a least one too many levels of abstraction. > > 3. It's actually less code. The nir_builder code is approximately 50% > larger than the GLSL code but, because you don't have to add built-in > functions and do all of the other plumbing per-opcode, it actually ends up > being smaller. Due to the way vectorization is handled (see next point), > it also involves a lot less infastructure in the lowering pass. Also, it > doesn't need 750 lines of standalone compiler code. > > 4. Because I used the "split" pack/unpack opcodes and bcsel instead of > "if", everything vectorizes automatically. It turns a i64vec4 iadd, for > instance, into a bunch of ivec4 operations and kicks out a i32vec4 result > in the end without ever splitting into 4 int64's. (The one exception to > this is the if statement in the division lowering which required a little > special care). This means that we don't have to carry extra code to split > all "dvec4" values into 4 "double" values because it gets handled by the > normal nir_alu_to_scalar pass that we already have. Also, because it uses > entirely vector instructions, it can work on an entire dvec4 at a time on > vec4 hardware (all geometry stages on Intel Haswell and earlier). This > should make it about 4x as fast on vec4 hardware. > > The downside, of course, to writing it nir_builder was that I duplicated > Ian's GLSL IR pass. I'm not a fan of duplicating code but, if int64 on > gen8+ was all I cared about, I think the end result is nice enough that I > don't really care about the code duplication. If, on the other hand, we're > going to have full int64 and fp64 lowering and want to provide both in both > IR's, then maybe
Re: [Mesa-dev] [PATCH] genxml: Depend on Makefile.am for generated sources.
On 2 March 2017 at 19:06, Matt Turner wrote: > Depending on the generated Makefile means that all generated sources are > recreated after ./configure. > --- > src/intel/Makefile.genxml.am | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/intel/Makefile.genxml.am b/src/intel/Makefile.genxml.am > index 2ed2741..20e4b15 100644 > --- a/src/intel/Makefile.genxml.am > +++ b/src/intel/Makefile.genxml.am > @@ -39,7 +39,7 @@ $(GENXML_GENERATED_FILES): genxml/gen_pack_header.py > # prefer to generate our own name here, so it doesn't vary from > # in/out-of-tree builds. > > -$(GENXML_GENERATED_FILES): Makefile > +$(GENXML_GENERATED_FILES): Makefile.am > Yes please Reviewed-by: Emil Velikov -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC 00/11] GL_ARB_gpu_shader_fp64
Hey Elie! On Fri, Mar 3, 2017 at 8:22 AM, Elie Tournier wrote: > From: Elie Tournier > > This series is based on Ian's work about GL_ARB_gpu_shader_int64 [1]. > The goal is to expose GL_ARB_shader_fp64 to OpenGL 3.0 GPUs. > > Each function can be independently tested using shader_runner from piglit. > The piglit files are stored on github [2]. > > [1] https://lists.freedesktop.org/archives/mesa-dev/2016- > November/136718.html > [2] https://github.com/Hopetech/libSoftFloat > Glad to see this finally turning into code. Before, we get too far into things, I'd like to talk about the approach a bit. First off, if we (Intel) are going to use this on any hardware, we would really like it to be in NIR. The reason for this is that NIR has a much more powerful algebraic optimizer than GLSL IR and we would like to have as few fp64 instructions as possible before we start lowering them to piles of integer math. I believe Ian's plan for this was that someone would write a nir_builder back-end for the stand-alone compiler. Unfortunately, he sort-of left that as "an exercise to the reader" and no code exists to my knowledge. If we're going to write things in GLSL, we really need that NIR back-end. When implemneting int64 (which needs similar lowering) for the Vulkan driver, I took the opportunity to try doing it directly in nir_builder instead of writing back-end code for the stand-alone compiler. All in all, I'm fairly happy with the result. You can find my (almost finished) branch here: https://cgit.freedesktop.org/~jekstrand/mesa/log/?h=wip/nir-int64 This approach had several advantages: 1. The compiler does less work. Loops can be automatically unrolled, you can choose to use select instead of control-flow, it doesn't generate functions that have to be inlined, etc. Now, in GLSL IR, using functions may actually be a requirement because it's a tree-based IR and adding stuff to the middle of the tree can be tricky. Also, I'm pretty sure they're a requirement for control-flow. NIR is flat so it's a bit nicer in that regard. 2. It doesn't require additional compiler infrastructure for converting GLSL to compiler code. We've gone back-and-forth over the years about how much is too much codegen. At one point, the build process built the GLSL compiler and used it to compile GLSL to compiler code for the built-ins and then built that into the compiler. The build system for doing this was a mess. The result was that Eric wrote ir_builder and all the code was moved over to that. A quick look at eiether GLSL IR or NIR will show you that we haven't completely rejected codegen but one always has to ask if it's really the best solution. Running the stand-alone compiler to generate code and then checking it in isn't a terrible solution, but it does seem like at it could be a least one too many levels of abstraction. 3. It's actually less code. The nir_builder code is approximately 50% larger than the GLSL code but, because you don't have to add built-in functions and do all of the other plumbing per-opcode, it actually ends up being smaller. Due to the way vectorization is handled (see next point), it also involves a lot less infastructure in the lowering pass. Also, it doesn't need 750 lines of standalone compiler code. 4. Because I used the "split" pack/unpack opcodes and bcsel instead of "if", everything vectorizes automatically. It turns a i64vec4 iadd, for instance, into a bunch of ivec4 operations and kicks out a i32vec4 result in the end without ever splitting into 4 int64's. (The one exception to this is the if statement in the division lowering which required a little special care). This means that we don't have to carry extra code to split all "dvec4" values into 4 "double" values because it gets handled by the normal nir_alu_to_scalar pass that we already have. Also, because it uses entirely vector instructions, it can work on an entire dvec4 at a time on vec4 hardware (all geometry stages on Intel Haswell and earlier). This should make it about 4x as fast on vec4 hardware. The downside, of course, to writing it nir_builder was that I duplicated Ian's GLSL IR pass. I'm not a fan of duplicating code but, if int64 on gen8+ was all I cared about, I think the end result is nice enough that I don't really care about the code duplication. If, on the other hand, we're going to have full int64 and fp64 lowering and want to provide both in both IR's, then maybe we should reconsider. :-) It's worth noting that, without adding more GLSL built-ins for the split pack/unpack opcodes, point 4 above will always be a problem if we use GLSL as the base language. One solution is to just do it in NIR and tell people that, if they want the lowering, they need to support NIR. Surprisingly, I'm not the one who is going to push too hard for this approach. If we can come up with a reasonable way to do it in both, I'm moderately ok with doing so if it isn't too much pain. Another solution that has
Re: [Mesa-dev] [RFC 07/11] glsl: Add "built-in" functions to do mul(fp64, fp64)
On Fri, Mar 3, 2017 at 10:51 AM, tournier.elie wrote: > On 3 March 2017 at 17:46, Eric Engestrom wrote: >> On Friday, 2017-03-03 16:23:03 +, Elie Tournier wrote: >>> Signed-off-by: Elie Tournier >>> --- >>> src/compiler/glsl/builtin_float64.h | 2558 >>> +++ >>> src/compiler/glsl/builtin_functions.cpp |4 + >>> src/compiler/glsl/builtin_functions.h |3 + >>> src/compiler/glsl/float64.glsl | 172 +++ >>> 4 files changed, 2737 insertions(+) >>> >>> diff --git a/src/compiler/glsl/builtin_float64.h >>> b/src/compiler/glsl/builtin_float64.h >>> index bf0953e5d6..0a363bd27a 100644 >>> --- a/src/compiler/glsl/builtin_float64.h >>> +++ b/src/compiler/glsl/builtin_float64.h >> [snip] >>> + >>> +/* THEN INSTRUCTIONS */ >>> +body.instructions = &f0EF9->then_instructions; >>> + >>> +ir_constant_data r0EFD_data; >>> +memset(&r0EFD_data, 0, sizeof(ir_constant_data)); >>> +r0EFD_data.u[0] = 4294967295; >>> +r0EFD_data.u[1] = 4294967295; >> >> Looks like some debug/testing left-overs? > > No. It's the IR representation of "return uvec2(0xu, 0xu);" Ah, it just looks like debugging code because it's not indented properly. Looks like a bug in the compiler. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] android: fix outdir for gen_enum_to_str files
2017-03-03 11:52 GMT+01:00 Tapani Pälli : > when files are being generated the value of $intermediates var content can be > completely random, this makes sure that outdir is the wanted one. The value of intermediates variable is local to the module and is set at the line: intermediates := $(call local-generated-sources-dir) For confirmation, why is $(vulkan_api_xml) variable ok and $(intermediates) not ok in the generation rules? Mauro > > Fixes: 3f2cb699 ("android: vulkan: add support for libmesa_vulkan_util") > Signed-off-by: Tapani Pälli > --- > src/vulkan/Android.mk | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/src/vulkan/Android.mk b/src/vulkan/Android.mk > index 9f71d8f..7653f34 100644 > --- a/src/vulkan/Android.mk > +++ b/src/vulkan/Android.mk > @@ -1,4 +1,5 @@ > # Copyright © 2017 Mauro Rossi > +# Copyright © 2017 Intel Corporation > # > # Permission is hereby granted, free of charge, to any person obtaining a > # copy of this software and associated documentation files (the "Software"), > @@ -45,7 +46,7 @@ vulkan_api_xml = $(MESA_TOP)/src/vulkan/registry/vk.xml > $(LOCAL_GENERATED_SOURCES): $(MESA_TOP)/src/vulkan/util/gen_enum_to_str.py > $(vulkan_api_xml) > @echo "target Generated: $(PRIVATE_MODULE) <= $(notdir $(@))" > @mkdir -p $(dir $@) > - $(hide) $(MESA_PYTHON2) > $(MESA_TOP)/src/vulkan/util/gen_enum_to_str.py --xml $(vulkan_api_xml) > --outdir $(intermediates)/util > + $(hide) $(MESA_PYTHON2) > $(MESA_TOP)/src/vulkan/util/gen_enum_to_str.py --xml $(vulkan_api_xml) > --outdir $(dir $@) > > LOCAL_EXPORT_C_INCLUDE_DIRS := \ > $(intermediates) > -- > 2.9.3 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC 07/11] glsl: Add "built-in" functions to do mul(fp64, fp64)
On 3 March 2017 at 17:46, Eric Engestrom wrote: > On Friday, 2017-03-03 16:23:03 +, Elie Tournier wrote: >> Signed-off-by: Elie Tournier >> --- >> src/compiler/glsl/builtin_float64.h | 2558 >> +++ >> src/compiler/glsl/builtin_functions.cpp |4 + >> src/compiler/glsl/builtin_functions.h |3 + >> src/compiler/glsl/float64.glsl | 172 +++ >> 4 files changed, 2737 insertions(+) >> >> diff --git a/src/compiler/glsl/builtin_float64.h >> b/src/compiler/glsl/builtin_float64.h >> index bf0953e5d6..0a363bd27a 100644 >> --- a/src/compiler/glsl/builtin_float64.h >> +++ b/src/compiler/glsl/builtin_float64.h > [snip] >> + >> +/* THEN INSTRUCTIONS */ >> +body.instructions = &f0EF9->then_instructions; >> + >> +ir_constant_data r0EFD_data; >> +memset(&r0EFD_data, 0, sizeof(ir_constant_data)); >> +r0EFD_data.u[0] = 4294967295; >> +r0EFD_data.u[1] = 4294967295; > > Looks like some debug/testing left-overs? No. It's the IR representation of "return uvec2(0xu, 0xu);" Multiply 0 by Inf (or Inf by 0) is an invalid operation. So when we have this kind of operation, we return a NaN. IEEE 754: " 7.2 Invalid operation For operations producing results in floating-point format, the default result of an operation that signals the invalid operation exception shall be a quiet NaN that should provide some diagnostic information. " > > [snip] >> + >> + /* THEN INSTRUCTIONS */ >> + body.instructions = &f0F22->then_instructions; >> + >> + ir_constant_data r0F26_data; >> + memset(&r0F26_data, 0, sizeof(ir_constant_data)); >> +r0F26_data.u[0] = 4294967295; >> +r0F26_data.u[1] = 4294967295; > > Ditto ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] fixup! i965: Move the back-end compiler to src/intel/compiler
> ok .. now I got some fixes for this here: > > https://github.com/tpalli/external-mesa/commits/move_compiler > > but yeah .. more work required :/ I don't understand why adding > MESA_GEN_GLSL_H to LOCAL_GENERATED_SOURCES does not help. Hi Emil, Tapani, The problem is just that the LOCAL_C_INCLUDES for new library are missing one line that was present in src/mesa/drivers/dri/i965/Android.mk, the following one: $(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_glsl,,)/glsl Mauro ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Moving i965 driver to genxml commands and structures
On Friday, March 3, 2017 10:27:51 AM PST Kenneth Graunke wrote: > On Friday, March 3, 2017 10:16:57 AM PST Jason Ekstrand wrote: > > Also, you can find most of the gen4-5 XML here: > > > > https://cgit.freedesktop.org/~jekstrand/mesa/log/?h=wip/blorp-gen4 > > > > That should save you some typing. :-) > > I typed up Gen4-4.5 XML a few months ago, and Jason fixed a bunch of > things, but we never landed it as we weren't using it for anything yet. > I fixed a few more things beyond the above branch, and pushed what we > have to master. Hopefully it saves you some effort. > > Gen5 XML is still missing. The Gen5 docs are in really bad shape, > so it's probably trickier. > > Also, here's the code I started in case it's useful as a reference > (it definitely won't apply anymore): > https://cgit.freedesktop.org/~kwg/mesa/log/?h=brwxml > > and also the code Lionel started a while back, for reference: > https://github.com/djdeath/mesa/commits/i965-genxml Also...in case the gen4-4.5 XML I pushed *isn't* useful, and you already had a better version typed up...feel free to throw it out and replace it with your copy. --Ken signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Moving i965 driver to genxml commands and structures
On Friday, March 3, 2017 10:16:57 AM PST Jason Ekstrand wrote: > Also, you can find most of the gen4-5 XML here: > > https://cgit.freedesktop.org/~jekstrand/mesa/log/?h=wip/blorp-gen4 > > That should save you some typing. :-) I typed up Gen4-4.5 XML a few months ago, and Jason fixed a bunch of things, but we never landed it as we weren't using it for anything yet. I fixed a few more things beyond the above branch, and pushed what we have to master. Hopefully it saves you some effort. Gen5 XML is still missing. The Gen5 docs are in really bad shape, so it's probably trickier. Also, here's the code I started in case it's useful as a reference (it definitely won't apply anymore): https://cgit.freedesktop.org/~kwg/mesa/log/?h=brwxml and also the code Lionel started a while back, for reference: https://github.com/djdeath/mesa/commits/i965-genxml signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Moving i965 driver to genxml commands and structures
Also, you can find most of the gen4-5 XML here: https://cgit.freedesktop.org/~jekstrand/mesa/log/?h=wip/blorp-gen4 That should save you some typing. :-) On Fri, Mar 3, 2017 at 8:57 AM, Jason Ekstrand wrote: > Lois-Francis, > > Thanks for taking a crack at this! There are a couple other people (I put > them in the Cc) who have talked about doing this so thank you for sending > the announcement! > > My primary request as you dive into this would be that you do things in > such a way that we can have a fairly tight feedback loop. I think both Ken > and myself have a pretty clear idea of what things should look like when > you're done and I'd like to avoid any chance that you send a 50 patch > series and then get told to go back through the whole thing and make some > mechanical change. > > One other comment: look at blorp. It's already using genxml with the i965 > driver although there's a but of an abstraction layer there. It should > give you a decent idea about how to make a brw_emit() macro by combining > things and removing the abstraction. > > --Jason > > Sent with AquaMail for Android > http://www.aqua-mail.com > > > > On March 3, 2017 6:41:59 AM Louis-Francis Ratté-Boulianne < > l...@collabora.com> wrote: > > Hi, >> >> As to avoid any duplicate work, I want to inform everyone that I'm in >> the process of modifying the i965 driver so that it uses the same >> genxml infrastructure that the one used by the Intel Vulkan driver. The >> task has been proposed by Jason Ekstrand. >> >> I will probably post my first patchset today that will contain the >> basic call to replace the BEGIN_BATCH/ADVANCE_BATCH macros and the XML >> specification files for generations 4, 4.5 and 5. >> >> Let me know if you have any question or suggestion related to this >> work. >> >> -- >> Louis-Francis Ratté-Boulianne >> > > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] util/disk_cache: support caches for multiple architectures
On Fri, 2017-03-03 at 12:24 +1100, Timothy Arceri wrote: > On 03/03/17 11:53, Marek Olšák wrote: > > > > OK. > > > > I also wonder if 1GB isn't too conservative. Today’s games take up > > a > > lot of space. My installed games occupy 480 GB. I could certainly > > spare 10 GB for a shader cache if it improves gaming experience. > > For > > example, my ccache size is set to 27 GB, because 1 or 5 or 10 GB > > wasn't enough for my use case. I assume some gamers would have a > > similar attitude. > > Yeah I agree that 1GB is probably too small. This was set by Carl > before > we even knew how much data we needed to cache. > > I'm happy to set it at 4GB which would be a possible 8GB total. > > We may need to cap it at 4GB for some platforms anyway, or at least > figure out a work around for this: > https://bugs.freedesktop.org/show_bug.cgi?id=93089 I wouldn't say that 1G was too small currently as, for example, the cache for shader heavy DeusEx:MD is ~50M compressed per your commit message. There is the mythical quote of 640K being enough but how many games and applications do you need cached at once? A more relevant issue would then be the random eviction rather than using LRU eviction. However perhaps we could dynamically scale by checking statvfs and quotactl to choose MAX[1G, MIN[10% user home filesystem, 10% user home quota]]? -- Alan. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC 07/11] glsl: Add "built-in" functions to do mul(fp64, fp64)
On Friday, 2017-03-03 16:23:03 +, Elie Tournier wrote: > Signed-off-by: Elie Tournier > --- > src/compiler/glsl/builtin_float64.h | 2558 > +++ > src/compiler/glsl/builtin_functions.cpp |4 + > src/compiler/glsl/builtin_functions.h |3 + > src/compiler/glsl/float64.glsl | 172 +++ > 4 files changed, 2737 insertions(+) > > diff --git a/src/compiler/glsl/builtin_float64.h > b/src/compiler/glsl/builtin_float64.h > index bf0953e5d6..0a363bd27a 100644 > --- a/src/compiler/glsl/builtin_float64.h > +++ b/src/compiler/glsl/builtin_float64.h [snip] > + > +/* THEN INSTRUCTIONS */ > +body.instructions = &f0EF9->then_instructions; > + > +ir_constant_data r0EFD_data; > +memset(&r0EFD_data, 0, sizeof(ir_constant_data)); > +r0EFD_data.u[0] = 4294967295; > +r0EFD_data.u[1] = 4294967295; Looks like some debug/testing left-overs? [snip] > + > + /* THEN INSTRUCTIONS */ > + body.instructions = &f0F22->then_instructions; > + > + ir_constant_data r0F26_data; > + memset(&r0F26_data, 0, sizeof(ir_constant_data)); > +r0F26_data.u[0] = 4294967295; > +r0F26_data.u[1] = 4294967295; Ditto ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Any updates on threaded GL dispatch?
On Fri, 3 Mar 2017 16:46:24 +0100 Marek Olšák wrote: > On Fri, Mar 3, 2017 at 10:19 AM, Timothy Arceri wrote: > > On 02/03/17 22:18, Marek Olšák wrote: > >> > >> The bad news is my involvement is currently on hold due to other > >> projects and responsibilities. > > > > > > I can probably spend some time on this. Seems like Gregory has taken care of > > most of the problems and it just needs someone to push it over the line. > > There are also plenty of unresolved review comments from Emil and others. > > Marek Yes I only fixed the piglit bad/crash regression on Nouveau. I added also the perf optimization for PCSX2 ;) I did a basic test of EGL (i.e. PCSX2) and it seems to work. By the way, I don't know how costly is the remaining synchronization but there are 2 potential optimizations * glUniform for double seems to sync whereas standard float are asynchronous. Maybe it misses the scale parameter in XML. I guess double was added after glthread. It might worth to check the behavior of glProgramUniform (introduce by SSO) too. * GL3 glClearBuffer functions are synchronous due to the pointer to the single pixel value. However there are maybe tricks to find the correct size of the pixel. Cheers, Gregory ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC 02/11] glsl: Add "built-in" function to do neg(fp64)
On Fri, Mar 3, 2017 at 11:57 AM, tournier.elie wrote: > On 3 March 2017 at 16:29, Ilia Mirkin wrote: >> On Fri, Mar 3, 2017 at 11:22 AM, Elie Tournier >> wrote: >>> Signed-off-by: Elie Tournier >>> --- >>> src/compiler/glsl/builtin_float64.h | 19 +++ >>> src/compiler/glsl/builtin_functions.cpp | 4 >>> src/compiler/glsl/builtin_functions.h | 3 +++ >>> src/compiler/glsl/float64.glsl | 10 ++ >>> 4 files changed, 36 insertions(+) >>> >>> diff --git a/src/compiler/glsl/builtin_float64.h >>> b/src/compiler/glsl/builtin_float64.h >>> index c1ec89d210..6df91e10f5 100644 >>> --- a/src/compiler/glsl/builtin_float64.h >>> +++ b/src/compiler/glsl/builtin_float64.h >>> @@ -17,3 +17,22 @@ fabs64(void *mem_ctx, builtin_available_predicate avail) >>> sig->replace_parameters(&sig_parameters); >>> return sig; >>> } >>> +ir_function_signature * >>> +fneg64(void *mem_ctx, builtin_available_predicate avail) >>> +{ >>> + ir_function_signature *const sig = >>> + new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail); >>> + ir_factory body(&sig->body, mem_ctx); >>> + sig->is_defined = true; >>> + >>> + exec_list sig_parameters; >>> + >>> + ir_variable *const r000C = new(mem_ctx) >>> ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); >>> + sig_parameters.push_tail(r000C); >>> + body.emit(assign(r000C, bit_xor(swizzle_x(r000C), >>> body.constant(2147483648u)), 0x01)); >>> + >>> + body.emit(ret(r000C)); >>> + >>> + sig->replace_parameters(&sig_parameters); >>> + return sig; >>> +} >>> diff --git a/src/compiler/glsl/builtin_functions.cpp >>> b/src/compiler/glsl/builtin_functions.cpp >>> index b0b1781725..a189b84190 100644 >>> --- a/src/compiler/glsl/builtin_functions.cpp >>> +++ b/src/compiler/glsl/builtin_functions.cpp >>> @@ -3133,6 +3133,10 @@ builtin_builder::create_builtins() >>> generate_ir::fabs64(mem_ctx, integer_functions_supported), >>> NULL); >>> >>> + add_function("__builtin_fneg64", >>> +generate_ir::fneg64(mem_ctx, integer_functions_supported), >>> +NULL); >>> + >>> #undef F >>> #undef FI >>> #undef FIUD_VEC >>> diff --git a/src/compiler/glsl/builtin_functions.h >>> b/src/compiler/glsl/builtin_functions.h >>> index abe02d97b6..37c6cc33c2 100644 >>> --- a/src/compiler/glsl/builtin_functions.h >>> +++ b/src/compiler/glsl/builtin_functions.h >>> @@ -66,6 +66,9 @@ sign64(void *mem_ctx, builtin_available_predicate avail); >>> ir_function_signature * >>> fabs64(void *mem_ctx, builtin_available_predicate avail); >>> >>> +ir_function_signature * >>> +fneg64(void *mem_ctx, builtin_available_predicate avail); >>> + >>> } >>> >>> #endif /* BULITIN_FUNCTIONS_H */ >>> diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl >>> index b8f0c2e444..82875e9407 100644 >>> --- a/src/compiler/glsl/float64.glsl >>> +++ b/src/compiler/glsl/float64.glsl >>> @@ -26,3 +26,13 @@ fabs64( uvec2 a ) >>> a.x &= 0x7FFFu; >>> return a; >>> } >>> + >>> +/* Negate value of a Float64 : >>> + * Toggle the sign bit >>> + */ >>> +uvec2 >>> +fneg64( uvec2 a ) >>> +{ >>> +a.x ^= (1u<<31); >> >> Is this right for NaN? Presumably neg(NaN) should == NaN. > > The IEEE 754 standard say : > > " 6.3 The sign bit > > When either an input or result is NaN, this standard does not > interpret the sign of a NaN. Note, however, > that operations on bit strings copy, negate, abs, copySign specify the > sign bit of a NaN result, > sometimes based upon the sign bit of a NaN operand. The logical > predicate totalOrder is also affected by > the sign bit of a NaN operand. For all other operations, this standard > does not specify the sign bit of a NaN > result, even when there is only one input NaN, or when the NaN is > produced from an invalid operation. " > > So neg(NaN) == NaN Right ... I guess I meant that there's a certain amount of normalization that's required to be applied to results of floating point operations. I was wondering if the sign bit of a NaN had to be cleared, so that it would be in canonical form. (Just like you'd set all of the mantissa bits, even though setting any of them results in a NaN when exp == 0x7ff.) You've been digging in the FP64 standard a lot more than I have, so if you think it's acceptable to have a fp64 function return a NaN with the sign bit set, that's fine by me. -ilia ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Moving i965 driver to genxml commands and structures
Lois-Francis, Thanks for taking a crack at this! There are a couple other people (I put them in the Cc) who have talked about doing this so thank you for sending the announcement! My primary request as you dive into this would be that you do things in such a way that we can have a fairly tight feedback loop. I think both Ken and myself have a pretty clear idea of what things should look like when you're done and I'd like to avoid any chance that you send a 50 patch series and then get told to go back through the whole thing and make some mechanical change. One other comment: look at blorp. It's already using genxml with the i965 driver although there's a but of an abstraction layer there. It should give you a decent idea about how to make a brw_emit() macro by combining things and removing the abstraction. --Jason Sent with AquaMail for Android http://www.aqua-mail.com On March 3, 2017 6:41:59 AM Louis-Francis Ratté-Boulianne wrote: Hi, As to avoid any duplicate work, I want to inform everyone that I'm in the process of modifying the i965 driver so that it uses the same genxml infrastructure that the one used by the Intel Vulkan driver. The task has been proposed by Jason Ekstrand. I will probably post my first patchset today that will contain the basic call to replace the BEGIN_BATCH/ADVANCE_BATCH macros and the XML specification files for generations 4, 4.5 and 5. Let me know if you have any question or suggestion related to this work. -- Louis-Francis Ratté-Boulianne ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC 02/11] glsl: Add "built-in" function to do neg(fp64)
On 3 March 2017 at 16:29, Ilia Mirkin wrote: > On Fri, Mar 3, 2017 at 11:22 AM, Elie Tournier > wrote: >> Signed-off-by: Elie Tournier >> --- >> src/compiler/glsl/builtin_float64.h | 19 +++ >> src/compiler/glsl/builtin_functions.cpp | 4 >> src/compiler/glsl/builtin_functions.h | 3 +++ >> src/compiler/glsl/float64.glsl | 10 ++ >> 4 files changed, 36 insertions(+) >> >> diff --git a/src/compiler/glsl/builtin_float64.h >> b/src/compiler/glsl/builtin_float64.h >> index c1ec89d210..6df91e10f5 100644 >> --- a/src/compiler/glsl/builtin_float64.h >> +++ b/src/compiler/glsl/builtin_float64.h >> @@ -17,3 +17,22 @@ fabs64(void *mem_ctx, builtin_available_predicate avail) >> sig->replace_parameters(&sig_parameters); >> return sig; >> } >> +ir_function_signature * >> +fneg64(void *mem_ctx, builtin_available_predicate avail) >> +{ >> + ir_function_signature *const sig = >> + new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail); >> + ir_factory body(&sig->body, mem_ctx); >> + sig->is_defined = true; >> + >> + exec_list sig_parameters; >> + >> + ir_variable *const r000C = new(mem_ctx) >> ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); >> + sig_parameters.push_tail(r000C); >> + body.emit(assign(r000C, bit_xor(swizzle_x(r000C), >> body.constant(2147483648u)), 0x01)); >> + >> + body.emit(ret(r000C)); >> + >> + sig->replace_parameters(&sig_parameters); >> + return sig; >> +} >> diff --git a/src/compiler/glsl/builtin_functions.cpp >> b/src/compiler/glsl/builtin_functions.cpp >> index b0b1781725..a189b84190 100644 >> --- a/src/compiler/glsl/builtin_functions.cpp >> +++ b/src/compiler/glsl/builtin_functions.cpp >> @@ -3133,6 +3133,10 @@ builtin_builder::create_builtins() >> generate_ir::fabs64(mem_ctx, integer_functions_supported), >> NULL); >> >> + add_function("__builtin_fneg64", >> +generate_ir::fneg64(mem_ctx, integer_functions_supported), >> +NULL); >> + >> #undef F >> #undef FI >> #undef FIUD_VEC >> diff --git a/src/compiler/glsl/builtin_functions.h >> b/src/compiler/glsl/builtin_functions.h >> index abe02d97b6..37c6cc33c2 100644 >> --- a/src/compiler/glsl/builtin_functions.h >> +++ b/src/compiler/glsl/builtin_functions.h >> @@ -66,6 +66,9 @@ sign64(void *mem_ctx, builtin_available_predicate avail); >> ir_function_signature * >> fabs64(void *mem_ctx, builtin_available_predicate avail); >> >> +ir_function_signature * >> +fneg64(void *mem_ctx, builtin_available_predicate avail); >> + >> } >> >> #endif /* BULITIN_FUNCTIONS_H */ >> diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl >> index b8f0c2e444..82875e9407 100644 >> --- a/src/compiler/glsl/float64.glsl >> +++ b/src/compiler/glsl/float64.glsl >> @@ -26,3 +26,13 @@ fabs64( uvec2 a ) >> a.x &= 0x7FFFu; >> return a; >> } >> + >> +/* Negate value of a Float64 : >> + * Toggle the sign bit >> + */ >> +uvec2 >> +fneg64( uvec2 a ) >> +{ >> +a.x ^= (1u<<31); > > Is this right for NaN? Presumably neg(NaN) should == NaN. The IEEE 754 standard say : " 6.3 The sign bit When either an input or result is NaN, this standard does not interpret the sign of a NaN. Note, however, that operations on bit strings copy, negate, abs, copySign specify the sign bit of a NaN result, sometimes based upon the sign bit of a NaN operand. The logical predicate totalOrder is also affected by the sign bit of a NaN operand. For all other operations, this standard does not specify the sign bit of a NaN result, even when there is only one input NaN, or when the NaN is produced from an invalid operation. " So neg(NaN) == NaN > >> +return a; >> +} >> -- >> 2.11.0 >> >> ___ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/4] swr: implement geometry shaders
Set looks great with a couple suggestions: * In swr_update_derived() it seems like the GS should also depend on SWR_NEW_VS dirty since swr_generate_gs_key() references ctx->vs->info.base. *In swr_context.h the SWR_NEW_ALL mask should be expanded by 2-bits to account for the new dirty flags. Or, just remove SWR_NEW_ALL since it’s a construct we never used. Bruce > On Mar 2, 2017, at 7:17 PM, Tim Rowley wrote: > > --- > src/gallium/drivers/swr/swr_context.cpp| 2 +- > src/gallium/drivers/swr/swr_context.h | 25 +- > src/gallium/drivers/swr/swr_draw.cpp | 50 +-- > src/gallium/drivers/swr/swr_fence_work.cpp | 21 ++ > src/gallium/drivers/swr/swr_fence_work.h | 3 + > src/gallium/drivers/swr/swr_scratch.cpp| 1 + > src/gallium/drivers/swr/swr_scratch.h | 1 + > src/gallium/drivers/swr/swr_screen.cpp | 6 +- > src/gallium/drivers/swr/swr_shader.cpp | 475 - > src/gallium/drivers/swr/swr_shader.h | 22 ++ > src/gallium/drivers/swr/swr_state.cpp | 99 +- > src/gallium/drivers/swr/swr_state.h| 50 +++ > src/gallium/drivers/swr/swr_tex_sample.cpp | 6 + > 13 files changed, 699 insertions(+), 62 deletions(-) > > diff --git a/src/gallium/drivers/swr/swr_context.cpp > b/src/gallium/drivers/swr/swr_context.cpp > index b89ce1b..1c98ac2 100644 > --- a/src/gallium/drivers/swr/swr_context.cpp > +++ b/src/gallium/drivers/swr/swr_context.cpp > @@ -307,7 +307,7 @@ swr_blit(struct pipe_context *pipe, const struct > pipe_blit_info *blit_info) >util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vertex_buffer); >util_blitter_save_vertex_elements(ctx->blitter, (void *)ctx->velems); >util_blitter_save_vertex_shader(ctx->blitter, (void *)ctx->vs); > - /*util_blitter_save_geometry_shader(ctx->blitter, (void*)ctx->gs);*/ > + util_blitter_save_geometry_shader(ctx->blitter, (void*)ctx->gs); >util_blitter_save_so_targets( > ctx->blitter, > ctx->num_so_targets, > diff --git a/src/gallium/drivers/swr/swr_context.h > b/src/gallium/drivers/swr/swr_context.h > index 04e11fe..624e069 100644 > --- a/src/gallium/drivers/swr/swr_context.h > +++ b/src/gallium/drivers/swr/swr_context.h > @@ -40,15 +40,17 @@ > #define SWR_NEW_SAMPLER_VIEW (1 << 4) > #define SWR_NEW_VS (1 << 5) > #define SWR_NEW_FS (1 << 6) > -#define SWR_NEW_VSCONSTANTS (1 << 7) > -#define SWR_NEW_FSCONSTANTS (1 << 8) > -#define SWR_NEW_VERTEX (1 << 9) > -#define SWR_NEW_STIPPLE (1 << 10) > -#define SWR_NEW_SCISSOR (1 << 11) > -#define SWR_NEW_VIEWPORT (1 << 12) > -#define SWR_NEW_FRAMEBUFFER (1 << 13) > -#define SWR_NEW_CLIP (1 << 14) > -#define SWR_NEW_SO (1 << 15) > +#define SWR_NEW_GS (1 << 7) > +#define SWR_NEW_VSCONSTANTS (1 << 8) > +#define SWR_NEW_FSCONSTANTS (1 << 9) > +#define SWR_NEW_GSCONSTANTS (1 << 10) > +#define SWR_NEW_VERTEX (1 << 11) > +#define SWR_NEW_STIPPLE (1 << 12) > +#define SWR_NEW_SCISSOR (1 << 13) > +#define SWR_NEW_VIEWPORT (1 << 14) > +#define SWR_NEW_FRAMEBUFFER (1 << 15) > +#define SWR_NEW_CLIP (1 << 16) > +#define SWR_NEW_SO (1 << 17) > #define SWR_NEW_ALL 0x > > namespace std > @@ -85,11 +87,15 @@ struct swr_draw_context { >uint32_t num_constantsVS[PIPE_MAX_CONSTANT_BUFFERS]; >const float *constantFS[PIPE_MAX_CONSTANT_BUFFERS]; >uint32_t num_constantsFS[PIPE_MAX_CONSTANT_BUFFERS]; > + const float *constantGS[PIPE_MAX_CONSTANT_BUFFERS]; > + uint32_t num_constantsGS[PIPE_MAX_CONSTANT_BUFFERS]; > >swr_jit_texture texturesVS[PIPE_MAX_SHADER_SAMPLER_VIEWS]; >swr_jit_sampler samplersVS[PIPE_MAX_SAMPLERS]; >swr_jit_texture texturesFS[PIPE_MAX_SHADER_SAMPLER_VIEWS]; >swr_jit_sampler samplersFS[PIPE_MAX_SAMPLERS]; > + swr_jit_texture texturesGS[PIPE_MAX_SHADER_SAMPLER_VIEWS]; > + swr_jit_sampler samplersGS[PIPE_MAX_SAMPLERS]; > >float userClipPlanes[PIPE_MAX_CLIP_PLANES][4]; > > @@ -112,6 +118,7 @@ struct swr_context { > >struct swr_vertex_shader *vs; >struct swr_fragment_shader *fs; > + struct swr_geometry_shader *gs; >struct swr_vertex_element_state *velems; > >/** Other rendering state */ > diff --git a/src/gallium/drivers/swr/swr_draw.cpp > b/src/gallium/drivers/swr/swr_draw.cpp > index f764efe..c43f4a5 100644 > --- a/src/gallium/drivers/swr/swr_draw.cpp > +++ b/src/gallium/drivers/swr/swr_draw.cpp > @@ -32,48 +32,6 @@ > #include "util/u_prim.h" > > /* > - * Convert mesa PIPE_PRIM_X to SWR enum PRIMITIVE_TOPOLOGY > - */ > -static INLINE enum PRIMITIVE_TOPOLOGY > -swr_convert_prim_topology(const unsigned mode) > -{ > - switch (mode) { > - case PIPE_PRIM_POINTS: > - return TOP_POINT_LIST; > - case PIPE_PRIM_LINES: > - return TOP_LINE_LIST; > - case PIPE_PRIM_LINE_LOOP: > - return TOP_LINE_LOOP; > - case PIPE_PRIM_LINE_STRIP: > - return TOP_LINE_STRIP; > - case PIPE_PRIM_TRIANGLES: > - return TOP_TRIANGLE_LIST; > - case PIPE_PRIM_TRIANGLE_STRIP: > - return TOP_TRIANGLE_STRIP; > - case PIPE_PRIM_TRIANG
Re: [Mesa-dev] [RFC 02/11] glsl: Add "built-in" function to do neg(fp64)
On Fri, Mar 3, 2017 at 11:22 AM, Elie Tournier wrote: > Signed-off-by: Elie Tournier > --- > src/compiler/glsl/builtin_float64.h | 19 +++ > src/compiler/glsl/builtin_functions.cpp | 4 > src/compiler/glsl/builtin_functions.h | 3 +++ > src/compiler/glsl/float64.glsl | 10 ++ > 4 files changed, 36 insertions(+) > > diff --git a/src/compiler/glsl/builtin_float64.h > b/src/compiler/glsl/builtin_float64.h > index c1ec89d210..6df91e10f5 100644 > --- a/src/compiler/glsl/builtin_float64.h > +++ b/src/compiler/glsl/builtin_float64.h > @@ -17,3 +17,22 @@ fabs64(void *mem_ctx, builtin_available_predicate avail) > sig->replace_parameters(&sig_parameters); > return sig; > } > +ir_function_signature * > +fneg64(void *mem_ctx, builtin_available_predicate avail) > +{ > + ir_function_signature *const sig = > + new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail); > + ir_factory body(&sig->body, mem_ctx); > + sig->is_defined = true; > + > + exec_list sig_parameters; > + > + ir_variable *const r000C = new(mem_ctx) > ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); > + sig_parameters.push_tail(r000C); > + body.emit(assign(r000C, bit_xor(swizzle_x(r000C), > body.constant(2147483648u)), 0x01)); > + > + body.emit(ret(r000C)); > + > + sig->replace_parameters(&sig_parameters); > + return sig; > +} > diff --git a/src/compiler/glsl/builtin_functions.cpp > b/src/compiler/glsl/builtin_functions.cpp > index b0b1781725..a189b84190 100644 > --- a/src/compiler/glsl/builtin_functions.cpp > +++ b/src/compiler/glsl/builtin_functions.cpp > @@ -3133,6 +3133,10 @@ builtin_builder::create_builtins() > generate_ir::fabs64(mem_ctx, integer_functions_supported), > NULL); > > + add_function("__builtin_fneg64", > +generate_ir::fneg64(mem_ctx, integer_functions_supported), > +NULL); > + > #undef F > #undef FI > #undef FIUD_VEC > diff --git a/src/compiler/glsl/builtin_functions.h > b/src/compiler/glsl/builtin_functions.h > index abe02d97b6..37c6cc33c2 100644 > --- a/src/compiler/glsl/builtin_functions.h > +++ b/src/compiler/glsl/builtin_functions.h > @@ -66,6 +66,9 @@ sign64(void *mem_ctx, builtin_available_predicate avail); > ir_function_signature * > fabs64(void *mem_ctx, builtin_available_predicate avail); > > +ir_function_signature * > +fneg64(void *mem_ctx, builtin_available_predicate avail); > + > } > > #endif /* BULITIN_FUNCTIONS_H */ > diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl > index b8f0c2e444..82875e9407 100644 > --- a/src/compiler/glsl/float64.glsl > +++ b/src/compiler/glsl/float64.glsl > @@ -26,3 +26,13 @@ fabs64( uvec2 a ) > a.x &= 0x7FFFu; > return a; > } > + > +/* Negate value of a Float64 : > + * Toggle the sign bit > + */ > +uvec2 > +fneg64( uvec2 a ) > +{ > +a.x ^= (1u<<31); Is this right for NaN? Presumably neg(NaN) should == NaN. > +return a; > +} > -- > 2.11.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC 10/11] glsl: Add "built-in" functions to do fp64_to_fp32(fp64)
Signed-off-by: Elie Tournier --- src/compiler/glsl/builtin_float64.h | 589 src/compiler/glsl/builtin_functions.cpp | 4 + src/compiler/glsl/builtin_functions.h | 3 + src/compiler/glsl/float64.glsl | 133 4 files changed, 729 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index b50ebc2dc2..dad5811289 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -23652,3 +23652,592 @@ fp32_to_fp64(void *mem_ctx, builtin_available_predicate avail) sig->replace_parameters(&sig_parameters); return sig; } +ir_function_signature * +packFloat32(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uint_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r13A7 = new(mem_ctx) ir_variable(glsl_type::uint_type, "zSign", ir_var_function_in); + sig_parameters.push_tail(r13A7); + ir_variable *const r13A8 = new(mem_ctx) ir_variable(glsl_type::uint_type, "zExp", ir_var_function_in); + sig_parameters.push_tail(r13A8); + ir_variable *const r13A9 = new(mem_ctx) ir_variable(glsl_type::uint_type, "zFrac", ir_var_function_in); + sig_parameters.push_tail(r13A9); + ir_expression *const r13AA = lshift(r13A7, body.constant(int(31))); + ir_expression *const r13AB = lshift(r13A8, body.constant(int(23))); + ir_expression *const r13AC = add(r13AA, r13AB); + ir_expression *const r13AD = add(r13AC, r13A9); + body.emit(ret(r13AD)); + + sig->replace_parameters(&sig_parameters); + return sig; +} +ir_function_signature * +shift32RightJamming(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::void_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r13AE = new(mem_ctx) ir_variable(glsl_type::uint_type, "a", ir_var_function_in); + sig_parameters.push_tail(r13AE); + ir_variable *const r13AF = new(mem_ctx) ir_variable(glsl_type::int_type, "count", ir_var_function_in); + sig_parameters.push_tail(r13AF); + ir_variable *const r13B0 = new(mem_ctx) ir_variable(glsl_type::uint_type, "zPtr", ir_var_function_inout); + sig_parameters.push_tail(r13B0); + ir_variable *const r13B1 = new(mem_ctx) ir_variable(glsl_type::uint_type, "z", ir_var_auto); + body.emit(r13B1); + /* IF CONDITION */ + ir_expression *const r13B3 = equal(r13AF, body.constant(int(0))); + ir_if *f13B2 = new(mem_ctx) ir_if(operand(r13B3).val); + exec_list *const f13B2_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f13B2->then_instructions; + + body.emit(assign(r13B1, r13AE, 0x01)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f13B2->else_instructions; + + /* IF CONDITION */ + ir_expression *const r13B5 = less(r13AF, body.constant(int(32))); + ir_if *f13B4 = new(mem_ctx) ir_if(operand(r13B5).val); + exec_list *const f13B4_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f13B4->then_instructions; + + ir_expression *const r13B6 = rshift(r13AE, r13AF); + ir_expression *const r13B7 = neg(r13AF); + ir_expression *const r13B8 = bit_and(r13B7, body.constant(int(31))); + ir_expression *const r13B9 = lshift(r13AE, r13B8); + ir_expression *const r13BA = nequal(r13B9, body.constant(0u)); + ir_expression *const r13BB = expr(ir_unop_b2i, r13BA); + ir_expression *const r13BC = expr(ir_unop_i2u, r13BB); + body.emit(assign(r13B1, bit_or(r13B6, r13BC), 0x01)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f13B4->else_instructions; + + ir_expression *const r13BD = nequal(r13AE, body.constant(0u)); + ir_expression *const r13BE = expr(ir_unop_b2i, r13BD); + body.emit(assign(r13B1, expr(ir_unop_i2u, r13BE), 0x01)); + + + body.instructions = f13B4_parent_instructions; + body.emit(f13B4); + + /* END IF */ + + + body.instructions = f13B2_parent_instructions; + body.emit(f13B2); + + /* END IF */ + + body.emit(assign(r13B0, r13B1, 0x01)); + + sig->replace_parameters(&sig_parameters); + return sig; +} +ir_function_signature * +roundAndPackFloat32(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uint_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r13BF = new(mem_ctx) ir_variable(glsl_type::uint_type, "zSign", ir_var_function_in); + sig_parameters.push_tail(r13BF); + ir_va
[Mesa-dev] [RFC 11/11] glsl: Add fp64 functions to the parser.
Like we use two integers to store our fp64, the functions are available with MESA_shader_integer_functions. Signed-off-by: Elie Tournier --- src/compiler/glsl/glcpp/glcpp-parse.y | 10 ++ 1 file changed, 10 insertions(+) diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y index e113253061..84dfc81c80 100644 --- a/src/compiler/glsl/glcpp/glcpp-parse.y +++ b/src/compiler/glsl/glcpp/glcpp-parse.y @@ -2349,6 +2349,16 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio add_builtin_define(parser, "__have_builtin_builtin_umod64", 1); add_builtin_define(parser, "__have_builtin_builtin_idiv64", 1); add_builtin_define(parser, "__have_builtin_builtin_imod64", 1); + add_builtin_define(parser, "__have_builtin_builtin_fabs64", 1); + add_builtin_define(parser, "__have_builtin_builtin_fneg64", 1); + add_builtin_define(parser, "__have_builtin_builtin_feq64", 1); + add_builtin_define(parser, "__have_builtin_builtin_fle64", 1); + add_builtin_define(parser, "__have_builtin_builtin_flt64", 1); + add_builtin_define(parser, "__have_builtin_builtin_fadd64", 1); + add_builtin_define(parser, "__have_builtin_builtin_fmul64", 1); + add_builtin_define(parser, "__have_builtin_builtin_fdiv64", 1); + add_builtin_define(parser, "__have_builtin_builtin_fp32_to_fp64", 1); + add_builtin_define(parser, "__have_builtin_builtin_fp64_to_fp32", 1); } } -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3] i965: Add script to gen code for OA counter queries
On 2 March 2017 at 21:25, Robert Bragg wrote: > > > On Mar 2, 2017 7:32 PM, "Emil Velikov" wrote: > > On 2 March 2017 at 18:58, Robert Bragg wrote: >> Adds R/b from Dylan and Makefile fixups from Emil, including fixing race >> with >> parallel make builds (thanks). Just holding fast on the use of #pragma >> once >> though :-) >> > I think you want the "required=True" for all the fields, but feel free > to do at a later stage. > > > The header and code args are checked like: > >> if args.header: >>header_file = open(args.header, 'w') >> >> if args.code: >>c_file = open(args.code, 'w') > Mostly food for thought, feel free to _not_ reply. - why would we want to generate only the header and not the source file or vise-versa? - we can call the generator with no output files - i.e. both --header and --code can be empty, right ? - what is going to happen if we do not pass the xml filename as an arg ? -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC 02/11] glsl: Add "built-in" function to do neg(fp64)
Signed-off-by: Elie Tournier --- src/compiler/glsl/builtin_float64.h | 19 +++ src/compiler/glsl/builtin_functions.cpp | 4 src/compiler/glsl/builtin_functions.h | 3 +++ src/compiler/glsl/float64.glsl | 10 ++ 4 files changed, 36 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index c1ec89d210..6df91e10f5 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -17,3 +17,22 @@ fabs64(void *mem_ctx, builtin_available_predicate avail) sig->replace_parameters(&sig_parameters); return sig; } +ir_function_signature * +fneg64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r000C = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r000C); + body.emit(assign(r000C, bit_xor(swizzle_x(r000C), body.constant(2147483648u)), 0x01)); + + body.emit(ret(r000C)); + + sig->replace_parameters(&sig_parameters); + return sig; +} diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp index b0b1781725..a189b84190 100644 --- a/src/compiler/glsl/builtin_functions.cpp +++ b/src/compiler/glsl/builtin_functions.cpp @@ -3133,6 +3133,10 @@ builtin_builder::create_builtins() generate_ir::fabs64(mem_ctx, integer_functions_supported), NULL); + add_function("__builtin_fneg64", +generate_ir::fneg64(mem_ctx, integer_functions_supported), +NULL); + #undef F #undef FI #undef FIUD_VEC diff --git a/src/compiler/glsl/builtin_functions.h b/src/compiler/glsl/builtin_functions.h index abe02d97b6..37c6cc33c2 100644 --- a/src/compiler/glsl/builtin_functions.h +++ b/src/compiler/glsl/builtin_functions.h @@ -66,6 +66,9 @@ sign64(void *mem_ctx, builtin_available_predicate avail); ir_function_signature * fabs64(void *mem_ctx, builtin_available_predicate avail); +ir_function_signature * +fneg64(void *mem_ctx, builtin_available_predicate avail); + } #endif /* BULITIN_FUNCTIONS_H */ diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl index b8f0c2e444..82875e9407 100644 --- a/src/compiler/glsl/float64.glsl +++ b/src/compiler/glsl/float64.glsl @@ -26,3 +26,13 @@ fabs64( uvec2 a ) a.x &= 0x7FFFu; return a; } + +/* Negate value of a Float64 : + * Toggle the sign bit + */ +uvec2 +fneg64( uvec2 a ) +{ +a.x ^= (1u<<31); +return a; +} -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC 04/11] glsl: Add "built-in" functions to do le(fp64, fp64)
Signed-off-by: Elie Tournier --- src/compiler/glsl/builtin_float64.h | 179 src/compiler/glsl/builtin_functions.cpp | 4 + src/compiler/glsl/builtin_functions.h | 3 + src/compiler/glsl/float64.glsl | 52 ++ 4 files changed, 238 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index e614374d75..f8ceacdabf 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -155,3 +155,182 @@ feq64(void *mem_ctx, builtin_available_predicate avail) sig->replace_parameters(&sig_parameters); return sig; } +ir_function_signature * +extractFloat64Sign(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uint_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r002D = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r002D); + ir_expression *const r002E = rshift(swizzle_x(r002D), body.constant(int(31))); + body.emit(ret(r002E)); + + sig->replace_parameters(&sig_parameters); + return sig; +} +ir_function_signature * +le64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::bool_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r002F = new(mem_ctx) ir_variable(glsl_type::uint_type, "a0", ir_var_function_in); + sig_parameters.push_tail(r002F); + ir_variable *const r0030 = new(mem_ctx) ir_variable(glsl_type::uint_type, "a1", ir_var_function_in); + sig_parameters.push_tail(r0030); + ir_variable *const r0031 = new(mem_ctx) ir_variable(glsl_type::uint_type, "b0", ir_var_function_in); + sig_parameters.push_tail(r0031); + ir_variable *const r0032 = new(mem_ctx) ir_variable(glsl_type::uint_type, "b1", ir_var_function_in); + sig_parameters.push_tail(r0032); + ir_expression *const r0033 = less(r002F, r0031); + ir_expression *const r0034 = equal(r002F, r0031); + ir_expression *const r0035 = lequal(r0030, r0032); + ir_expression *const r0036 = logic_and(r0034, r0035); + ir_expression *const r0037 = logic_or(r0033, r0036); + body.emit(ret(r0037)); + + sig->replace_parameters(&sig_parameters); + return sig; +} +ir_function_signature * +fle64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::bool_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r0038 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r0038); + ir_variable *const r0039 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "b", ir_var_function_in); + sig_parameters.push_tail(r0039); + ir_variable *const r003A = body.make_temp(glsl_type::bool_type, "return_value"); + ir_variable *const r003B = new(mem_ctx) ir_variable(glsl_type::bool_type, "isbNaN", ir_var_auto); + body.emit(r003B); + ir_variable *const r003C = new(mem_ctx) ir_variable(glsl_type::bool_type, "isaNaN", ir_var_auto); + body.emit(r003C); + ir_variable *const r003D = body.make_temp(glsl_type::uvec2_type, "vec_ctor"); + body.emit(assign(r003D, bit_and(swizzle_x(r0038), body.constant(1048575u)), 0x01)); + + body.emit(assign(r003D, swizzle_y(r0038), 0x02)); + + ir_variable *const r003E = body.make_temp(glsl_type::uvec2_type, "vec_ctor"); + body.emit(assign(r003E, bit_and(swizzle_x(r0039), body.constant(1048575u)), 0x01)); + + body.emit(assign(r003E, swizzle_y(r0039), 0x02)); + + ir_expression *const r003F = rshift(swizzle_x(r0038), body.constant(int(20))); + ir_expression *const r0040 = bit_and(r003F, body.constant(2047u)); + ir_expression *const r0041 = equal(r0040, body.constant(2047u)); + ir_expression *const r0042 = bit_or(swizzle_x(r003D), swizzle_y(r0038)); + ir_expression *const r0043 = nequal(r0042, body.constant(0u)); + body.emit(assign(r003C, logic_and(r0041, r0043), 0x01)); + + ir_expression *const r0044 = rshift(swizzle_x(r0039), body.constant(int(20))); + ir_expression *const r0045 = bit_and(r0044, body.constant(2047u)); + ir_expression *const r0046 = equal(r0045, body.constant(2047u)); + ir_expression *const r0047 = bit_or(swizzle_x(r003E), swizzle_y(r0039)); + ir_expression *const r0048 = nequal(r0047, body.constant(0u)); + body.emit(assign(r003B, logic_and(r0046, r0048), 0x01)); + + /* IF CONDITION */ + ir_expression *const r004A = logic_or(r003C, r003B); + ir_if *f0049 = new(mem_ctx) ir_if(operand(r004A).val); + exec_list *const f0049_parent_instructions = body.instructions; + + /* TH
[Mesa-dev] [RFC 03/11] glsl: Add "built-in" functions to do eq(fp64, fp64)
Signed-off-by: Elie Tournier --- src/compiler/glsl/builtin_float64.h | 119 src/compiler/glsl/builtin_functions.cpp | 4 ++ src/compiler/glsl/builtin_functions.h | 3 + src/compiler/glsl/float64.glsl | 41 +++ 4 files changed, 167 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index 6df91e10f5..e614374d75 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -36,3 +36,122 @@ fneg64(void *mem_ctx, builtin_available_predicate avail) sig->replace_parameters(&sig_parameters); return sig; } +ir_function_signature * +extractFloat64Frac(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r000D = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r000D); + ir_variable *const r000E = body.make_temp(glsl_type::uvec2_type, "vec_ctor"); + body.emit(assign(r000E, bit_and(swizzle_x(r000D), body.constant(1048575u)), 0x01)); + + body.emit(assign(r000E, swizzle_y(r000D), 0x02)); + + body.emit(ret(r000E)); + + sig->replace_parameters(&sig_parameters); + return sig; +} +ir_function_signature * +extractFloat64Exp(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uint_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r000F = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r000F); + ir_expression *const r0010 = rshift(swizzle_x(r000F), body.constant(int(20))); + ir_expression *const r0011 = bit_and(r0010, body.constant(2047u)); + body.emit(ret(r0011)); + + sig->replace_parameters(&sig_parameters); + return sig; +} +ir_function_signature * +feq64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::bool_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r0012 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r0012); + ir_variable *const r0013 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "b", ir_var_function_in); + sig_parameters.push_tail(r0013); + ir_variable *const r0014 = body.make_temp(glsl_type::bool_type, "return_value"); + ir_variable *const r0015 = new(mem_ctx) ir_variable(glsl_type::bool_type, "isbNaN", ir_var_auto); + body.emit(r0015); + ir_variable *const r0016 = new(mem_ctx) ir_variable(glsl_type::bool_type, "isaNaN", ir_var_auto); + body.emit(r0016); + ir_variable *const r0017 = body.make_temp(glsl_type::uvec2_type, "vec_ctor"); + body.emit(assign(r0017, bit_and(swizzle_x(r0012), body.constant(1048575u)), 0x01)); + + body.emit(assign(r0017, swizzle_y(r0012), 0x02)); + + ir_variable *const r0018 = body.make_temp(glsl_type::uvec2_type, "vec_ctor"); + body.emit(assign(r0018, bit_and(swizzle_x(r0013), body.constant(1048575u)), 0x01)); + + body.emit(assign(r0018, swizzle_y(r0013), 0x02)); + + ir_expression *const r0019 = rshift(swizzle_x(r0012), body.constant(int(20))); + ir_expression *const r001A = bit_and(r0019, body.constant(2047u)); + ir_expression *const r001B = equal(r001A, body.constant(2047u)); + ir_expression *const r001C = bit_or(swizzle_x(r0017), swizzle_y(r0012)); + ir_expression *const r001D = nequal(r001C, body.constant(0u)); + body.emit(assign(r0016, logic_and(r001B, r001D), 0x01)); + + ir_expression *const r001E = rshift(swizzle_x(r0013), body.constant(int(20))); + ir_expression *const r001F = bit_and(r001E, body.constant(2047u)); + ir_expression *const r0020 = equal(r001F, body.constant(2047u)); + ir_expression *const r0021 = bit_or(swizzle_x(r0018), swizzle_y(r0013)); + ir_expression *const r0022 = nequal(r0021, body.constant(0u)); + body.emit(assign(r0015, logic_and(r0020, r0022), 0x01)); + + /* IF CONDITION */ + ir_expression *const r0024 = logic_or(r0016, r0015); + ir_if *f0023 = new(mem_ctx) ir_if(operand(r0024).val); + exec_list *const f0023_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f0023->then_instructions; + + body.emit(assign(r0014, body.constant(false), 0x01)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f0023->else_instructions; + + ir_expression *const r0025 = equal(swizzle_y(r0012), swizzle_y(r0013)); + ir_expression *const r0026 = equal(swizzle_x(r0012), swizzle_x(r0013)); +
[Mesa-dev] [RFC 00/11] GL_ARB_gpu_shader_fp64
From: Elie Tournier This series is based on Ian's work about GL_ARB_gpu_shader_int64 [1]. The goal is to expose GL_ARB_shader_fp64 to OpenGL 3.0 GPUs. Each function can be independently tested using shader_runner from piglit. The piglit files are stored on github [2]. [1] https://lists.freedesktop.org/archives/mesa-dev/2016-November/136718.html [2] https://github.com/Hopetech/libSoftFloat Elie Tournier (11): glsl: Add "built-in" function to do abs(fp64) glsl: Add "built-in" function to do neg(fp64) glsl: Add "built-in" functions to do eq(fp64,fp64) glsl: Add "built-in" functions to do le(fp64,fp64) glsl: Add "built-in" functions to do lt(fp64,fp64) glsl: Add "built-in" functions to do add(fp64,fp64) glsl: Add "built-in" functions to do mul(fp64,fp64) glsl: Add "built-in" functions to do div(fp64,fp64) glsl: Add "built-in" functions to do fp32_to_fp64(fp32) glsl: Add "built-in" functions to do fp64_to_fp32(fp64) glsl: Add fp64 functions to the parser. src/compiler/Makefile.sources | 1 + src/compiler/glsl/builtin_float64.h | 24243 ++ src/compiler/glsl/builtin_functions.cpp |40 + src/compiler/glsl/builtin_functions.h |30 + src/compiler/glsl/float64.glsl | 1378 ++ src/compiler/glsl/generate_ir.cpp | 1 + src/compiler/glsl/glcpp/glcpp-parse.y |10 + 7 files changed, 25703 insertions(+) create mode 100644 src/compiler/glsl/builtin_float64.h create mode 100644 src/compiler/glsl/float64.glsl -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC 09/11] glsl: Add "built-in" functions to do fp32_to_fp64(fp32)
Signed-off-by: Elie Tournier --- src/compiler/glsl/builtin_float64.h | 490 src/compiler/glsl/builtin_functions.cpp | 4 + src/compiler/glsl/builtin_functions.h | 3 + src/compiler/glsl/float64.glsl | 77 + 4 files changed, 574 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index a795d404c1..b50ebc2dc2 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -23162,3 +23162,493 @@ r1189_data.u[1] = 4294967295; sig->replace_parameters(&sig_parameters); return sig; } +ir_function_signature * +normalizeFloat32Subnormal(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::void_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r1354 = new(mem_ctx) ir_variable(glsl_type::uint_type, "aFrac", ir_var_function_in); + sig_parameters.push_tail(r1354); + ir_variable *const r1355 = new(mem_ctx) ir_variable(glsl_type::uint_type, "zExpPtr", ir_var_function_inout); + sig_parameters.push_tail(r1355); + ir_variable *const r1356 = new(mem_ctx) ir_variable(glsl_type::uint_type, "zFracPtr", ir_var_function_inout); + sig_parameters.push_tail(r1356); + ir_variable *const r1357 = new(mem_ctx) ir_variable(glsl_type::uint_type, "shiftCount", ir_var_auto); + body.emit(r1357); + ir_variable *const r1358 = body.make_temp(glsl_type::uint_type, "a"); + body.emit(assign(r1358, r1354, 0x01)); + + ir_variable *const r1359 = body.make_temp(glsl_type::uint_type, "return_value"); + ir_variable *const r135A = new(mem_ctx) ir_variable(glsl_type::uint_type, "shiftCount", ir_var_auto); + body.emit(r135A); + /* IF CONDITION */ + ir_expression *const r135C = equal(r1354, body.constant(0u)); + ir_if *f135B = new(mem_ctx) ir_if(operand(r135C).val); + exec_list *const f135B_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f135B->then_instructions; + + body.emit(assign(r1359, body.constant(32u), 0x01)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f135B->else_instructions; + + body.emit(assign(r135A, body.constant(0u), 0x01)); + + /* IF CONDITION */ + ir_expression *const r135E = bit_and(r1354, body.constant(4294901760u)); + ir_expression *const r135F = equal(r135E, body.constant(0u)); + ir_if *f135D = new(mem_ctx) ir_if(operand(r135F).val); + exec_list *const f135D_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f135D->then_instructions; + + body.emit(assign(r135A, body.constant(16u), 0x01)); + + body.emit(assign(r1358, lshift(r1354, body.constant(int(16))), 0x01)); + + + body.instructions = f135D_parent_instructions; + body.emit(f135D); + + /* END IF */ + + /* IF CONDITION */ + ir_expression *const r1361 = bit_and(r1358, body.constant(4278190080u)); + ir_expression *const r1362 = equal(r1361, body.constant(0u)); + ir_if *f1360 = new(mem_ctx) ir_if(operand(r1362).val); + exec_list *const f1360_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f1360->then_instructions; + + body.emit(assign(r135A, add(r135A, body.constant(8u)), 0x01)); + + body.emit(assign(r1358, lshift(r1358, body.constant(int(8))), 0x01)); + + + body.instructions = f1360_parent_instructions; + body.emit(f1360); + + /* END IF */ + + /* IF CONDITION */ + ir_expression *const r1364 = bit_and(r1358, body.constant(4026531840u)); + ir_expression *const r1365 = equal(r1364, body.constant(0u)); + ir_if *f1363 = new(mem_ctx) ir_if(operand(r1365).val); + exec_list *const f1363_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f1363->then_instructions; + + body.emit(assign(r135A, add(r135A, body.constant(4u)), 0x01)); + + body.emit(assign(r1358, lshift(r1358, body.constant(int(4))), 0x01)); + + + body.instructions = f1363_parent_instructions; + body.emit(f1363); + + /* END IF */ + + /* IF CONDITION */ + ir_expression *const r1367 = bit_and(r1358, body.constant(3221225472u)); + ir_expression *const r1368 = equal(r1367, body.constant(0u)); + ir_if *f1366 = new(mem_ctx) ir_if(operand(r1368).val); + exec_list *const f1366_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f1366->then_instructions; + + body.emit(assign(r135A, add(r135A, body.constant(2u)), 0x01)); + + body.emit(assign(r1358, lshift(r1358, body.constant(int(2))), 0x01)); + + + body.instructions = f1366_parent_instructions; + body.emit(f13
[Mesa-dev] [RFC 05/11] glsl: Add "built-in" functions to do lt(fp64, fp64)
Signed-off-by: Elie Tournier --- src/compiler/glsl/builtin_float64.h | 161 src/compiler/glsl/builtin_functions.cpp | 4 + src/compiler/glsl/builtin_functions.h | 3 + src/compiler/glsl/float64.glsl | 45 + 4 files changed, 213 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index f8ceacdabf..e825536466 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -334,3 +334,164 @@ fle64(void *mem_ctx, builtin_available_predicate avail) sig->replace_parameters(&sig_parameters); return sig; } +ir_function_signature * +lt64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::bool_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r0060 = new(mem_ctx) ir_variable(glsl_type::uint_type, "a0", ir_var_function_in); + sig_parameters.push_tail(r0060); + ir_variable *const r0061 = new(mem_ctx) ir_variable(glsl_type::uint_type, "a1", ir_var_function_in); + sig_parameters.push_tail(r0061); + ir_variable *const r0062 = new(mem_ctx) ir_variable(glsl_type::uint_type, "b0", ir_var_function_in); + sig_parameters.push_tail(r0062); + ir_variable *const r0063 = new(mem_ctx) ir_variable(glsl_type::uint_type, "b1", ir_var_function_in); + sig_parameters.push_tail(r0063); + ir_expression *const r0064 = less(r0060, r0062); + ir_expression *const r0065 = equal(r0060, r0062); + ir_expression *const r0066 = less(r0061, r0063); + ir_expression *const r0067 = logic_and(r0065, r0066); + ir_expression *const r0068 = logic_or(r0064, r0067); + body.emit(ret(r0068)); + + sig->replace_parameters(&sig_parameters); + return sig; +} +ir_function_signature * +flt64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::bool_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r0069 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r0069); + ir_variable *const r006A = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "b", ir_var_function_in); + sig_parameters.push_tail(r006A); + ir_variable *const r006B = body.make_temp(glsl_type::bool_type, "return_value"); + ir_variable *const r006C = new(mem_ctx) ir_variable(glsl_type::bool_type, "isbNaN", ir_var_auto); + body.emit(r006C); + ir_variable *const r006D = new(mem_ctx) ir_variable(glsl_type::bool_type, "isaNaN", ir_var_auto); + body.emit(r006D); + ir_variable *const r006E = body.make_temp(glsl_type::uvec2_type, "vec_ctor"); + body.emit(assign(r006E, bit_and(swizzle_x(r0069), body.constant(1048575u)), 0x01)); + + body.emit(assign(r006E, swizzle_y(r0069), 0x02)); + + ir_variable *const r006F = body.make_temp(glsl_type::uvec2_type, "vec_ctor"); + body.emit(assign(r006F, bit_and(swizzle_x(r006A), body.constant(1048575u)), 0x01)); + + body.emit(assign(r006F, swizzle_y(r006A), 0x02)); + + ir_expression *const r0070 = rshift(swizzle_x(r0069), body.constant(int(20))); + ir_expression *const r0071 = bit_and(r0070, body.constant(2047u)); + ir_expression *const r0072 = equal(r0071, body.constant(2047u)); + ir_expression *const r0073 = bit_or(swizzle_x(r006E), swizzle_y(r0069)); + ir_expression *const r0074 = nequal(r0073, body.constant(0u)); + body.emit(assign(r006D, logic_and(r0072, r0074), 0x01)); + + ir_expression *const r0075 = rshift(swizzle_x(r006A), body.constant(int(20))); + ir_expression *const r0076 = bit_and(r0075, body.constant(2047u)); + ir_expression *const r0077 = equal(r0076, body.constant(2047u)); + ir_expression *const r0078 = bit_or(swizzle_x(r006F), swizzle_y(r006A)); + ir_expression *const r0079 = nequal(r0078, body.constant(0u)); + body.emit(assign(r006C, logic_and(r0077, r0079), 0x01)); + + /* IF CONDITION */ + ir_expression *const r007B = logic_or(r006D, r006C); + ir_if *f007A = new(mem_ctx) ir_if(operand(r007B).val); + exec_list *const f007A_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f007A->then_instructions; + + body.emit(assign(r006B, body.constant(false), 0x01)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f007A->else_instructions; + + ir_variable *const r007C = body.make_temp(glsl_type::uint_type, "extractFloat64Sign_retval"); + body.emit(assign(r007C, rshift(swizzle_x(r0069), body.constant(int(31))), 0x01)); + + ir_variable *const r007D = body.make_temp(glsl_type::uint_type, "extractFloat64Sign_retval"); + body.emit(assign(r007D, rshift(swizzle_x(r006A), body.constant(int(31))), 0x01)); + + /* IF CONDITION
[Mesa-dev] [RFC 01/11] glsl: Add "built-in" function to do abs(fp64)
Signed-off-by: Elie Tournier --- src/compiler/Makefile.sources | 1 + src/compiler/glsl/builtin_float64.h | 19 +++ src/compiler/glsl/builtin_functions.cpp | 4 src/compiler/glsl/builtin_functions.h | 3 +++ src/compiler/glsl/float64.glsl | 28 src/compiler/glsl/generate_ir.cpp | 1 + 6 files changed, 56 insertions(+) create mode 100644 src/compiler/glsl/builtin_float64.h create mode 100644 src/compiler/glsl/float64.glsl diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index 643a0181d8..b67834246f 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -22,6 +22,7 @@ LIBGLSL_FILES = \ glsl/builtin_functions.cpp \ glsl/builtin_functions.h \ glsl/builtin_int64.h \ + glsl/builtin_float64.h \ glsl/builtin_types.cpp \ glsl/builtin_variables.cpp \ glsl/generate_ir.cpp \ diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h new file mode 100644 index 00..c1ec89d210 --- /dev/null +++ b/src/compiler/glsl/builtin_float64.h @@ -0,0 +1,19 @@ +ir_function_signature * +fabs64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r000B = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r000B); + body.emit(assign(r000B, bit_and(swizzle_x(r000B), body.constant(2147483647u)), 0x01)); + + body.emit(ret(r000B)); + + sig->replace_parameters(&sig_parameters); + return sig; +} diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp index e03a50c843..b0b1781725 100644 --- a/src/compiler/glsl/builtin_functions.cpp +++ b/src/compiler/glsl/builtin_functions.cpp @@ -3129,6 +3129,10 @@ builtin_builder::create_builtins() generate_ir::umul64(mem_ctx, integer_functions_supported), NULL); + add_function("__builtin_fabs64", +generate_ir::fabs64(mem_ctx, integer_functions_supported), +NULL); + #undef F #undef FI #undef FIUD_VEC diff --git a/src/compiler/glsl/builtin_functions.h b/src/compiler/glsl/builtin_functions.h index 7ae211b48a..abe02d97b6 100644 --- a/src/compiler/glsl/builtin_functions.h +++ b/src/compiler/glsl/builtin_functions.h @@ -63,6 +63,9 @@ umul64(void *mem_ctx, builtin_available_predicate avail); ir_function_signature * sign64(void *mem_ctx, builtin_available_predicate avail); +ir_function_signature * +fabs64(void *mem_ctx, builtin_available_predicate avail); + } #endif /* BULITIN_FUNCTIONS_H */ diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl new file mode 100644 index 00..b8f0c2e444 --- /dev/null +++ b/src/compiler/glsl/float64.glsl @@ -0,0 +1,28 @@ +/* Compile with: + * + * glsl_compiler --version 130 --dump-builder float64.glsl > builtin_float64.h + * + */ + +#version 130 + +/* Software IEEE floating-point rounding mode. + * GLSL spec section "4.7.1 Range and Precision": + * The rounding mode cannot be set and is undefined. + * But here, we are able to define the rounding mode at the compilation time. + */ +#define FLOAT_ROUND_NEAREST_EVEN0 +#define FLOAT_ROUND_TO_ZERO 1 +#define FLOAT_ROUND_DOWN2 +#define FLOAT_ROUND_UP 3 +#define FLOAT_ROUNDING_MODE FLOAT_ROUND_NEAREST_EVEN + +/* Absolute value of a Float64 : + * Clear the sign bit + */ +uvec2 +fabs64( uvec2 a ) +{ +a.x &= 0x7FFFu; +return a; +} diff --git a/src/compiler/glsl/generate_ir.cpp b/src/compiler/glsl/generate_ir.cpp index 255b0484f2..e6ece4860f 100644 --- a/src/compiler/glsl/generate_ir.cpp +++ b/src/compiler/glsl/generate_ir.cpp @@ -29,5 +29,6 @@ using namespace ir_builder; namespace generate_ir { #include "builtin_int64.h" +#include "builtin_float64.h" } -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Any updates on threaded GL dispatch?
On Fri, Mar 3, 2017 at 10:19 AM, Timothy Arceri wrote: > On 02/03/17 22:18, Marek Olšák wrote: >> >> The bad news is my involvement is currently on hold due to other >> projects and responsibilities. > > > I can probably spend some time on this. Seems like Gregory has taken care of > most of the problems and it just needs someone to push it over the line. There are also plenty of unresolved review comments from Emil and others. Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC PATCH 0/7] gallium: Video postprocessor rework
So after an off-line chat with Christian we decided to drop this series, since I don't have the cycles to fix this up. For completeness, if anybody decides at some point to (re)use some of the code, some comments below. On 03/03/2017 01:32 PM, Christian König wrote: > Hi Thomas, > > Am 02.03.2017 um 21:00 schrieb Thomas Hellstrom: >> This patch series introduces a postprocessor abstraction. It could be >> promoted >> to a gallium interface but for now it's implemented as a utility. > > Well, first of all use a gallium interface for this. Putting this into > utility doesn't look like it makes much sense when it is supposed to > be a driver interface. > >> The idea >> is that a driver wanting to implement the whole or part of the >> postprocessing >> pipeline using video hardware can do that. It also significantly >> simplifies >> implementing a future full postprocessor pipeline also in the libVA >> state tracker. >> >> Some highlights: >> - The postprocessor abstraction is very similar to the vl compositor >> API, >> except it add filters. > > NAK on that, we already had that design and it was a pain to support. > > If you want to implement parts of the post processing pipeline in the > driver make each pipeline step a separate object. The compositor API very well suited our needs and was very easy and straightforward to work with. I guess mostly because it resembles the postprocessing pipeline of both VAAPI, VDPAU and DXVA/D3D11. > >> - The bounce buffer cache. Caches buffers where we know more about >> the usage. >> This may duplicate driver functionality but for drivers with strongly >> typed >> buffers, it's a big win. > > That's similar to what I've done for the video buffers, but in general > I think that this doesn't belong into the state tracker. > > The driver should implement buffer reuse if it can do this without > problems, but how many buffers should be in there and when they are > reclaimed is strongly hardware dependent. True. Although for drivers with strongly typed buffers, that becomes difficult since caches needs to be quite large to facilitate hits. The caching needs for this pipeline is also special in that the number of cached buffers is small very predictable and there's no need to wait for idle before reusing buffers... > >> - Moving the bicubic filter. Avoids duplicated code and makes things >> like >> rotation and blending straightforward. > > A BIG NAK on that as well. The bicubic filter doesn't work on all > hardware, so creating the shaders all the time while it isn't use is > clearly not an option. > > Additional to that I would rather remove functionality from the > compositor than stuffing even more into it. Compiling on-demand is an easy fix. Having all scaling options in one place logically and physically seemed attractive and helped avoiding unwanted behaviour like filtering overlaid or already scaled surfaces and video-background borders, double scaling of overlay surfaces. Also enabled blending and rotation. Thanks, Thomas > >> - A g3dvl implementation. Mostly replicates the old implementation >> using the >> vl compositor, but with provisions to replace part of the postprocessing >> pipeline. >> - State tracker modifications for VDPAU and VA. >> >> - Tested with mplayer -vo vdpau software playback on Nouveau. > > Please test that with radeon hardware as well if possible. Nouveau > doesn't fully support all the functionality and project like Kodi > don't test with it. > > Regards, > Christian. > >> >> >> ___ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev&d=DwICaQ&c=uilaK90D4TOVoH58JNXRgQ&r=wnSlgOCqfpNS4d02vP68_E9q2BNMCwfD2OZ_6dCFVQQ&m=GDt3ZtrRjpa7Se4bsMYXF18gBs6IXAqtSW_3Y3df0Bs&s=-HnyAt6aoAvWpesLw7KAHyyQmiJVv43cJNL7VmIjOh0&e= > > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] Moving i965 driver to genxml commands and structures
Hi, As to avoid any duplicate work, I want to inform everyone that I'm in the process of modifying the i965 driver so that it uses the same genxml infrastructure that the one used by the Intel Vulkan driver. The task has been proposed by Jason Ekstrand. I will probably post my first patchset today that will contain the basic call to replace the BEGIN_BATCH/ADVANCE_BATCH macros and the XML specification files for generations 4, 4.5 and 5. Let me know if you have any question or suggestion related to this work. -- Louis-Francis Ratté-Boulianne ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] fixup! i965: Move the back-end compiler to src/intel/compiler
On 3 March 2017 at 11:50, Tapani Pälli wrote: > > > On 03/03/2017 01:30 PM, Tapani Pälli wrote: >> >> >> >> On 03/03/2017 01:21 PM, Tapani Pälli wrote: >>> >>> >>> >>> On 03/02/2017 03:41 PM, Emil Velikov wrote: Cc: Mauro Rossi Cc: Tapani Pälli Cc: Jason Ekstrand --- All, here is a 5min attempt to fix the Android build. Tapani, Mauro do give it a test since I've done an educated guess here. Pull Jason's branch and apply on top. https://cgit.freedesktop.org/~jekstrand/mesa/log/?h=wip/move-compiler >>> >>> >>> when compiling blorp and isl it says: >>> >>> fatal error: 'brw_compiler.h' file not found >>> >>> this can be fixed by putting 'src/intel/compiler' include path for blorp >>> and isl libraries: >>> >>> >>> https://github.com/tpalli/external-mesa/commit/4ffef80a45bd93731b2d2af0cb532687f11ae8d3 >>> >>> >>> >>> but when linking i965_dri library then there is quite a big explosion of >>> missing symbols, I can take a look at this but a bit later. >> >> >> doh sorry, it looks like I did not apply your patch after all in that >> tree ... ignore the noise, will try again > > > ok .. now I got some fixes for this here: > > https://github.com/tpalli/external-mesa/commits/move_compiler > > but yeah .. more work required :/ I don't understand why adding > MESA_GEN_GLSL_H to LOCAL_GENERATED_SOURCES does not help. > Thanks for the fixup, but I think my earlier suggestion is coming to bite us. We really want to stop the stop the copy/pasta of the existing rules - just move them to a common place add use them. See below for an explicit example. One at a time of course. To resolve the dependency tracking one can use a) libmesa_genxml approach or b) have a short parser in the top-level Android.mk that goes through the BUILT_SOURCES and runs the generation rule(s). -Emil With the risk of sounding like a knob I'll repeat the plan, one more time. - move the Makefile.am rule, [as-is] to separate file - where applicable/needed add $srcdir and define that for Android - avoid adding path prefix for the generated files - - above might lead to files being generated in a non !$intermediates path, if so cd/update instructions to ensure they don't end all over the place. - update CleanSpec.mk if needed - throw the includes into Makefile.sources/other file - ... - profit Here is roughly how things might look - do use better better names as you see fit. $ cat src/util/Android.mk LOCAL_PATH := $(call my-dir) # We'll need to use srcdir/top_srcdir/others in the next file # If doing that in MESA_COMMON_MK does not work, because it's too late - use another file and include it here. PYTHON_GEN = $(hide)$(MESA_PYTHON2) include $(LOCAL_PATH)/Makefile.gen // has generation rule(s) include $(LOCAL_PATH)/Makefile.sources // has sources list(s) and C/CPP/CXXFLAGS include $(CLEAR_VARS) LOCAL_SRC_FILES := $(MESA_UTIL_FILES) LOCAL_CFLAGS := $(MESA_CPP_FLAGS) LOCAL_MODULE := libmesa_util LOCAL_MODULE_CLASS := STATIC_LIBRARIES # Generated sources # Files are implicitly added the to LOCAL_SRC_FILES by the Android build system # Generation rules are in Makefile.gen LOCAL_GENERATED_SOURCES := $(MESA_UTIL_GENERATED_FILES) include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) $ cat src/util/Makefile.am PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) include Makefile.gen include Makefile.sources noinst_LTLIBRARIES = libmesautil.la libmesautil_la_CPPFLAGS = $(MESA_CPP_FLAGS) libmesautil_la_SOURCES = \ $(MESA_UTIL_FILES) \ $(MESA_UTIL_GENERATED_FILES) # test/automake specific bits ... $ cat src/util/Makefile.sources MESA_CPP_FLAGS := \ $(DEFINES) \ -I$(top_srcdir)/include \ -I$(top_srcdir)/src \ -I$(top_srcdir)/src/mapi \ -I$(top_srcdir)/src/mesa \ -I$(top_srcdir)/src/gallium/include \ -I$(top_srcdir)/src/gallium/auxiliary \ $(VISIBILITY_CFLAGS) \ $(MSVC2013_COMPAT_CFLAGS) # existing SOURCES lists ... $cat src/util/Makefile.gen format_srgb.c: $(srcdir)/format_srgb.py $(PYTHON_GEN) $(srcdir)/format_srgb.py > $@ ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 100049] "ralloc: Make sure ralloc() allocations match malloc()'s alignment." causes seg fault in 32bit build
https://bugs.freedesktop.org/show_bug.cgi?id=100049 Bug ID: 100049 Summary: "ralloc: Make sure ralloc() allocations match malloc()'s alignment." causes seg fault in 32bit build Product: Mesa Version: git Hardware: Other OS: All Status: NEW Severity: normal Priority: medium Component: Mesa core Assignee: mesa-dev@lists.freedesktop.org Reporter: raffa...@zoho.com QA Contact: mesa-dev@lists.freedesktop.org Created attachment 130046 --> https://bugs.freedesktop.org/attachment.cgi?id=130046&action=edit glxinfo 32bit backtrace commit cd2b55e536dc806f9358f71db438dd9c246cdb14 "ralloc: Make sure ralloc() allocations match malloc()'s alignment." causes segmentation fault in 32bit build with -O3, -O2 works fine. -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] android: fix outdir for gen_enum_to_str files
On Friday, 2017-03-03 12:52:56 +0200, Tapani Pälli wrote: > when files are being generated the value of $intermediates var content can be > completely random, this makes sure that outdir is the wanted one. > > Fixes: 3f2cb699 ("android: vulkan: add support for libmesa_vulkan_util") > Signed-off-by: Tapani Pälli Reviewed-by: Eric Engestrom This can probably be applied to a lot of other places; even though it might not be fixing bugs there, it's still cleaner to reuse the var than copying the path. > --- > src/vulkan/Android.mk | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/src/vulkan/Android.mk b/src/vulkan/Android.mk > index 9f71d8f..7653f34 100644 > --- a/src/vulkan/Android.mk > +++ b/src/vulkan/Android.mk > @@ -1,4 +1,5 @@ > # Copyright © 2017 Mauro Rossi > +# Copyright © 2017 Intel Corporation > # > # Permission is hereby granted, free of charge, to any person obtaining a > # copy of this software and associated documentation files (the "Software"), > @@ -45,7 +46,7 @@ vulkan_api_xml = $(MESA_TOP)/src/vulkan/registry/vk.xml > $(LOCAL_GENERATED_SOURCES): $(MESA_TOP)/src/vulkan/util/gen_enum_to_str.py > $(vulkan_api_xml) > @echo "target Generated: $(PRIVATE_MODULE) <= $(notdir $(@))" > @mkdir -p $(dir $@) > - $(hide) $(MESA_PYTHON2) $(MESA_TOP)/src/vulkan/util/gen_enum_to_str.py > --xml $(vulkan_api_xml) --outdir $(intermediates)/util > + $(hide) $(MESA_PYTHON2) $(MESA_TOP)/src/vulkan/util/gen_enum_to_str.py > --xml $(vulkan_api_xml) --outdir $(dir $@) > > LOCAL_EXPORT_C_INCLUDE_DIRS := \ > $(intermediates) > -- > 2.9.3 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC PATCH 0/7] gallium: Video postprocessor rework
Hi Thomas, Am 02.03.2017 um 21:00 schrieb Thomas Hellstrom: This patch series introduces a postprocessor abstraction. It could be promoted to a gallium interface but for now it's implemented as a utility. Well, first of all use a gallium interface for this. Putting this into utility doesn't look like it makes much sense when it is supposed to be a driver interface. The idea is that a driver wanting to implement the whole or part of the postprocessing pipeline using video hardware can do that. It also significantly simplifies implementing a future full postprocessor pipeline also in the libVA state tracker. Some highlights: - The postprocessor abstraction is very similar to the vl compositor API, except it add filters. NAK on that, we already had that design and it was a pain to support. If you want to implement parts of the post processing pipeline in the driver make each pipeline step a separate object. - The bounce buffer cache. Caches buffers where we know more about the usage. This may duplicate driver functionality but for drivers with strongly typed buffers, it's a big win. That's similar to what I've done for the video buffers, but in general I think that this doesn't belong into the state tracker. The driver should implement buffer reuse if it can do this without problems, but how many buffers should be in there and when they are reclaimed is strongly hardware dependent. - Moving the bicubic filter. Avoids duplicated code and makes things like rotation and blending straightforward. A BIG NAK on that as well. The bicubic filter doesn't work on all hardware, so creating the shaders all the time while it isn't use is clearly not an option. Additional to that I would rather remove functionality from the compositor than stuffing even more into it. - A g3dvl implementation. Mostly replicates the old implementation using the vl compositor, but with provisions to replace part of the postprocessing pipeline. - State tracker modifications for VDPAU and VA. - Tested with mplayer -vo vdpau software playback on Nouveau. Please test that with radeon hardware as well if possible. Nouveau doesn't fully support all the functionality and project like Kodi don't test with it. Regards, Christian. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH V2] util/disk_cache: support caches for multiple architectures
On Fri, Mar 3, 2017 at 5:27 AM, Timothy Arceri wrote: > Previously we were deleting the entire cache if a user switched > between 32 and 64 bit applications. > > V2: make the check more generic, it should now work with any > platform we are likely to support. > --- > src/util/disk_cache.c | 19 +++ > 1 file changed, 19 insertions(+) > > diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c > index 3abdec4..92528a9 100644 > --- a/src/util/disk_cache.c > +++ b/src/util/disk_cache.c > @@ -40,20 +40,30 @@ > #include "zlib.h" > > #include "util/crc32.h" > #include "util/u_atomic.h" > #include "util/mesa-sha1.h" > #include "util/ralloc.h" > #include "main/errors.h" > > #include "disk_cache.h" > > +#if defined(__ILP32__) > +#if defined(__x86_64__) || defined(__arm__) > +#define CACHE_ARCH "ilp-32" > +#else > +#define CACHE_ARCH "32" > +#endif > +#else > +#define CACHE_ARCH "64" > +#endif That reports "64" for me on gcc -m32, I think only clang sets __ILP32__ for non-x32 32bit build. I'd still suggest using sizeof(void *) directly in the code, perhaps within some "const char *get_arch_bitness_string()" helper, that should be more reliable. Gražvydas ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] configure.ac: require pthread-stubs only where available
On 2 March 2017 at 19:34, Gary Wong wrote: > On Thu, Mar 02, 2017 at 07:02:44PM +, Emil Velikov wrote: >> Jeremy, others, >> >> Afaict pthread-stubs expands to a simple .pc on your platforms, but a >> confirmation will be greatly appreciated. > > Hurd is not really "my" platform, but yes, I agree that with its > current libc, pthreads-stubs is merely a dummy .pc. This was not > always the case: > > https://www.gnu.org/software/hurd/open_issues/libpthread_dlopen.html > Right. My train of thought was that even if I got it wrong, people will still have greater knowledge than me where to look ;-) Thanks for the confirmation ! Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/4] configure.ac: increase required swr llvm to 3.9.0
Hi Tim, On 3 March 2017 at 01:16, Tim Rowley wrote: > GS implementation uses the masked.{gather,store} intrinsics, > introduced in llvm-3.9.0. Please mention in the commit message that the SCons build already requires 3.9 or later. Can you add a note about the LLVM requirement and GS support in docs/relnotes/17.1.0.html, with a separate commit on top ? With this we have some ~20 preprocessor conditionals which want to be cleaned up. Look for $ git grep "LLVM_.*VERSION\|HAVE_LLVM" -- src/gallium/drivers/swr/ Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] fixup! i965: Move the back-end compiler to src/intel/compiler
On 03/03/2017 01:30 PM, Tapani Pälli wrote: On 03/03/2017 01:21 PM, Tapani Pälli wrote: On 03/02/2017 03:41 PM, Emil Velikov wrote: Cc: Mauro Rossi Cc: Tapani Pälli Cc: Jason Ekstrand --- All, here is a 5min attempt to fix the Android build. Tapani, Mauro do give it a test since I've done an educated guess here. Pull Jason's branch and apply on top. https://cgit.freedesktop.org/~jekstrand/mesa/log/?h=wip/move-compiler when compiling blorp and isl it says: fatal error: 'brw_compiler.h' file not found this can be fixed by putting 'src/intel/compiler' include path for blorp and isl libraries: https://github.com/tpalli/external-mesa/commit/4ffef80a45bd93731b2d2af0cb532687f11ae8d3 but when linking i965_dri library then there is quite a big explosion of missing symbols, I can take a look at this but a bit later. doh sorry, it looks like I did not apply your patch after all in that tree ... ignore the noise, will try again ok .. now I got some fixes for this here: https://github.com/tpalli/external-mesa/commits/move_compiler but yeah .. more work required :/ I don't understand why adding MESA_GEN_GLSL_H to LOCAL_GENERATED_SOURCES does not help. --- .../Android.gen.mk => intel/Android.compiler.mk} | 34 ++ src/intel/Android.mk | 1 + src/mesa/drivers/dri/i965/Android.mk | 30 ++- 3 files changed, 31 insertions(+), 34 deletions(-) rename src/{mesa/drivers/dri/i965/Android.gen.mk => intel/Android.compiler.mk} (64%) diff --git a/src/mesa/drivers/dri/i965/Android.gen.mk b/src/intel/Android.compiler.mk similarity index 64% rename from src/mesa/drivers/dri/i965/Android.gen.mk rename to src/intel/Android.compiler.mk index c2b8dae339..2d4ba91396 100644 --- a/src/mesa/drivers/dri/i965/Android.gen.mk +++ b/src/intel/Android.compiler.mk @@ -1,4 +1,7 @@ # +# Copyright (C) 2011 Intel Corporation +# Copyright (C) 2010-2011 Chia-I Wu +# Copyright (C) 2010-2011 LunarG # Copyright (C) 2016 Linaro, Ltd., Rob Herring # # Permission is hereby granted, free of charge, to any person obtaining a @@ -20,21 +23,40 @@ # DEALINGS IN THE SOFTWARE. # -ifeq ($(LOCAL_MODULE_CLASS),) -LOCAL_MODULE_CLASS := SHARED_LIBRARIES -endif +# --- +# Build libmesa_intel_compiler +# --- + +include $(CLEAR_VARS) + +LOCAL_MODULE := libmesa_intel_compiler +LOCAL_MODULE_CLASS := STATIC_LIBRARIES + +LOCAL_SRC_FILES := \ +$(COMPILER_FILES) + +LOCAL_C_INCLUDES := \ +$(MESA_TOP)/src/intel \ +$(MESA_TOP)/src/compiler/nir \ +$(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_nir,,)/nir + +LOCAL_SHARED_LIBRARIES := \ +libdrm_intel brw_nir_trig_workarounds_deps := \ -$(LOCAL_PATH)/brw_nir_trig_workarounds.py \ +$(LOCAL_PATH)/compiler/brw_nir_trig_workarounds.py \ $(MESA_TOP)/src/compiler/nir/nir_algebraic.py intermediates := $(call local-generated-sources-dir) -$(intermediates)/brw_nir_trig_workarounds.c: $(brw_nir_trig_workarounds_deps) +$(intermediates)/compiler/brw_nir_trig_workarounds.c: $(brw_nir_trig_workarounds_deps) @mkdir -p $(dir $@) $(hide) PYTHONPATH=$(MESA_TOP)/src/compiler/nir $(MESA_PYTHON2) $< > $@ LOCAL_STATIC_LIBRARIES = libmesa_genxml LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \ -$(i965_compiler_GENERATED_FILES)) +$(COMPILER_GENERATED_FILES)) + +include $(MESA_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/intel/Android.mk b/src/intel/Android.mk index 7cb2bb9f8d..db6c770f35 100644 --- a/src/intel/Android.mk +++ b/src/intel/Android.mk @@ -27,6 +27,7 @@ include $(LOCAL_PATH)/Makefile.sources include $(LOCAL_PATH)/Android.blorp.mk include $(LOCAL_PATH)/Android.common.mk +include $(LOCAL_PATH)/Android.compiler.mk include $(LOCAL_PATH)/Android.genxml.mk include $(LOCAL_PATH)/Android.isl.mk include $(LOCAL_PATH)/Android.vulkan.mk diff --git a/src/mesa/drivers/dri/i965/Android.mk b/src/mesa/drivers/dri/i965/Android.mk index 7dea3c2507..708851c866 100644 --- a/src/mesa/drivers/dri/i965/Android.mk +++ b/src/mesa/drivers/dri/i965/Android.mk @@ -152,32 +152,6 @@ include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) # --- -# Build libmesa_i965_compiler -# --- - -include $(CLEAR_VARS) - -LOCAL_MODULE := libmesa_i965_compiler -LOCAL_MODULE_CLASS := STATIC_LIBRARIES - -LOCAL_SRC_FILES := \ -$(i965_compiler_FILES) - -LOCAL_C_INCLUDES := \ -$(MESA_DRI_C_INCLUDES) \ -$(MESA_TOP)/src/intel \ -$(MESA_TOP)/src/compiler/nir \ -$(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_nir,,)/nir \ -$(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_glsl,,)/glsl - -LOCAL_SHARED_LIBRARIES := \ -libdrm_intel - -include $(LOCAL_PATH)/Android.gen.mk -include $(MESA_COMMON_MK) -include $(BUILD_STATIC_LIBRARY) - -# --- # Build i965_d
Re: [Mesa-dev] [PATCH] fixup! i965: Move the back-end compiler to src/intel/compiler
On 03/03/2017 01:21 PM, Tapani Pälli wrote: On 03/02/2017 03:41 PM, Emil Velikov wrote: Cc: Mauro Rossi Cc: Tapani Pälli Cc: Jason Ekstrand --- All, here is a 5min attempt to fix the Android build. Tapani, Mauro do give it a test since I've done an educated guess here. Pull Jason's branch and apply on top. https://cgit.freedesktop.org/~jekstrand/mesa/log/?h=wip/move-compiler when compiling blorp and isl it says: fatal error: 'brw_compiler.h' file not found this can be fixed by putting 'src/intel/compiler' include path for blorp and isl libraries: https://github.com/tpalli/external-mesa/commit/4ffef80a45bd93731b2d2af0cb532687f11ae8d3 but when linking i965_dri library then there is quite a big explosion of missing symbols, I can take a look at this but a bit later. doh sorry, it looks like I did not apply your patch after all in that tree ... ignore the noise, will try again --- .../Android.gen.mk => intel/Android.compiler.mk} | 34 ++ src/intel/Android.mk | 1 + src/mesa/drivers/dri/i965/Android.mk | 30 ++- 3 files changed, 31 insertions(+), 34 deletions(-) rename src/{mesa/drivers/dri/i965/Android.gen.mk => intel/Android.compiler.mk} (64%) diff --git a/src/mesa/drivers/dri/i965/Android.gen.mk b/src/intel/Android.compiler.mk similarity index 64% rename from src/mesa/drivers/dri/i965/Android.gen.mk rename to src/intel/Android.compiler.mk index c2b8dae339..2d4ba91396 100644 --- a/src/mesa/drivers/dri/i965/Android.gen.mk +++ b/src/intel/Android.compiler.mk @@ -1,4 +1,7 @@ # +# Copyright (C) 2011 Intel Corporation +# Copyright (C) 2010-2011 Chia-I Wu +# Copyright (C) 2010-2011 LunarG # Copyright (C) 2016 Linaro, Ltd., Rob Herring # # Permission is hereby granted, free of charge, to any person obtaining a @@ -20,21 +23,40 @@ # DEALINGS IN THE SOFTWARE. # -ifeq ($(LOCAL_MODULE_CLASS),) -LOCAL_MODULE_CLASS := SHARED_LIBRARIES -endif +# --- +# Build libmesa_intel_compiler +# --- + +include $(CLEAR_VARS) + +LOCAL_MODULE := libmesa_intel_compiler +LOCAL_MODULE_CLASS := STATIC_LIBRARIES + +LOCAL_SRC_FILES := \ +$(COMPILER_FILES) + +LOCAL_C_INCLUDES := \ +$(MESA_TOP)/src/intel \ +$(MESA_TOP)/src/compiler/nir \ +$(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_nir,,)/nir + +LOCAL_SHARED_LIBRARIES := \ +libdrm_intel brw_nir_trig_workarounds_deps := \ -$(LOCAL_PATH)/brw_nir_trig_workarounds.py \ +$(LOCAL_PATH)/compiler/brw_nir_trig_workarounds.py \ $(MESA_TOP)/src/compiler/nir/nir_algebraic.py intermediates := $(call local-generated-sources-dir) -$(intermediates)/brw_nir_trig_workarounds.c: $(brw_nir_trig_workarounds_deps) +$(intermediates)/compiler/brw_nir_trig_workarounds.c: $(brw_nir_trig_workarounds_deps) @mkdir -p $(dir $@) $(hide) PYTHONPATH=$(MESA_TOP)/src/compiler/nir $(MESA_PYTHON2) $< > $@ LOCAL_STATIC_LIBRARIES = libmesa_genxml LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \ -$(i965_compiler_GENERATED_FILES)) +$(COMPILER_GENERATED_FILES)) + +include $(MESA_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/intel/Android.mk b/src/intel/Android.mk index 7cb2bb9f8d..db6c770f35 100644 --- a/src/intel/Android.mk +++ b/src/intel/Android.mk @@ -27,6 +27,7 @@ include $(LOCAL_PATH)/Makefile.sources include $(LOCAL_PATH)/Android.blorp.mk include $(LOCAL_PATH)/Android.common.mk +include $(LOCAL_PATH)/Android.compiler.mk include $(LOCAL_PATH)/Android.genxml.mk include $(LOCAL_PATH)/Android.isl.mk include $(LOCAL_PATH)/Android.vulkan.mk diff --git a/src/mesa/drivers/dri/i965/Android.mk b/src/mesa/drivers/dri/i965/Android.mk index 7dea3c2507..708851c866 100644 --- a/src/mesa/drivers/dri/i965/Android.mk +++ b/src/mesa/drivers/dri/i965/Android.mk @@ -152,32 +152,6 @@ include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) # --- -# Build libmesa_i965_compiler -# --- - -include $(CLEAR_VARS) - -LOCAL_MODULE := libmesa_i965_compiler -LOCAL_MODULE_CLASS := STATIC_LIBRARIES - -LOCAL_SRC_FILES := \ -$(i965_compiler_FILES) - -LOCAL_C_INCLUDES := \ -$(MESA_DRI_C_INCLUDES) \ -$(MESA_TOP)/src/intel \ -$(MESA_TOP)/src/compiler/nir \ -$(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_nir,,)/nir \ -$(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_glsl,,)/glsl - -LOCAL_SHARED_LIBRARIES := \ -libdrm_intel - -include $(LOCAL_PATH)/Android.gen.mk -include $(MESA_COMMON_MK) -include $(BUILD_STATIC_LIBRARY) - -# --- # Build i965_dri # --- @@ -209,9 +183,9 @@ LOCAL_WHOLE_STATIC_LIBRARIES := \ $(MESA_DRI_WHOLE_STATIC_LIBRARIES) \ $(I965_PERGEN_LIBS) \ libmesa_intel_common \ -libmesa_blorp \ libmesa_isl \ -libmesa_i965_compiler +libmesa_
Re: [Mesa-dev] [PATCH] fixup! i965: Move the back-end compiler to src/intel/compiler
On 03/02/2017 03:41 PM, Emil Velikov wrote: Cc: Mauro Rossi Cc: Tapani Pälli Cc: Jason Ekstrand --- All, here is a 5min attempt to fix the Android build. Tapani, Mauro do give it a test since I've done an educated guess here. Pull Jason's branch and apply on top. https://cgit.freedesktop.org/~jekstrand/mesa/log/?h=wip/move-compiler when compiling blorp and isl it says: fatal error: 'brw_compiler.h' file not found this can be fixed by putting 'src/intel/compiler' include path for blorp and isl libraries: https://github.com/tpalli/external-mesa/commit/4ffef80a45bd93731b2d2af0cb532687f11ae8d3 but when linking i965_dri library then there is quite a big explosion of missing symbols, I can take a look at this but a bit later. --- .../Android.gen.mk => intel/Android.compiler.mk} | 34 ++ src/intel/Android.mk | 1 + src/mesa/drivers/dri/i965/Android.mk | 30 ++- 3 files changed, 31 insertions(+), 34 deletions(-) rename src/{mesa/drivers/dri/i965/Android.gen.mk => intel/Android.compiler.mk} (64%) diff --git a/src/mesa/drivers/dri/i965/Android.gen.mk b/src/intel/Android.compiler.mk similarity index 64% rename from src/mesa/drivers/dri/i965/Android.gen.mk rename to src/intel/Android.compiler.mk index c2b8dae339..2d4ba91396 100644 --- a/src/mesa/drivers/dri/i965/Android.gen.mk +++ b/src/intel/Android.compiler.mk @@ -1,4 +1,7 @@ # +# Copyright (C) 2011 Intel Corporation +# Copyright (C) 2010-2011 Chia-I Wu +# Copyright (C) 2010-2011 LunarG # Copyright (C) 2016 Linaro, Ltd., Rob Herring # # Permission is hereby granted, free of charge, to any person obtaining a @@ -20,21 +23,40 @@ # DEALINGS IN THE SOFTWARE. # -ifeq ($(LOCAL_MODULE_CLASS),) -LOCAL_MODULE_CLASS := SHARED_LIBRARIES -endif +# --- +# Build libmesa_intel_compiler +# --- + +include $(CLEAR_VARS) + +LOCAL_MODULE := libmesa_intel_compiler +LOCAL_MODULE_CLASS := STATIC_LIBRARIES + +LOCAL_SRC_FILES := \ + $(COMPILER_FILES) + +LOCAL_C_INCLUDES := \ + $(MESA_TOP)/src/intel \ + $(MESA_TOP)/src/compiler/nir \ + $(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_nir,,)/nir + +LOCAL_SHARED_LIBRARIES := \ + libdrm_intel brw_nir_trig_workarounds_deps := \ - $(LOCAL_PATH)/brw_nir_trig_workarounds.py \ + $(LOCAL_PATH)/compiler/brw_nir_trig_workarounds.py \ $(MESA_TOP)/src/compiler/nir/nir_algebraic.py intermediates := $(call local-generated-sources-dir) -$(intermediates)/brw_nir_trig_workarounds.c: $(brw_nir_trig_workarounds_deps) +$(intermediates)/compiler/brw_nir_trig_workarounds.c: $(brw_nir_trig_workarounds_deps) @mkdir -p $(dir $@) $(hide) PYTHONPATH=$(MESA_TOP)/src/compiler/nir $(MESA_PYTHON2) $< > $@ LOCAL_STATIC_LIBRARIES = libmesa_genxml LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \ - $(i965_compiler_GENERATED_FILES)) + $(COMPILER_GENERATED_FILES)) + +include $(MESA_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/intel/Android.mk b/src/intel/Android.mk index 7cb2bb9f8d..db6c770f35 100644 --- a/src/intel/Android.mk +++ b/src/intel/Android.mk @@ -27,6 +27,7 @@ include $(LOCAL_PATH)/Makefile.sources include $(LOCAL_PATH)/Android.blorp.mk include $(LOCAL_PATH)/Android.common.mk +include $(LOCAL_PATH)/Android.compiler.mk include $(LOCAL_PATH)/Android.genxml.mk include $(LOCAL_PATH)/Android.isl.mk include $(LOCAL_PATH)/Android.vulkan.mk diff --git a/src/mesa/drivers/dri/i965/Android.mk b/src/mesa/drivers/dri/i965/Android.mk index 7dea3c2507..708851c866 100644 --- a/src/mesa/drivers/dri/i965/Android.mk +++ b/src/mesa/drivers/dri/i965/Android.mk @@ -152,32 +152,6 @@ include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) # --- -# Build libmesa_i965_compiler -# --- - -include $(CLEAR_VARS) - -LOCAL_MODULE := libmesa_i965_compiler -LOCAL_MODULE_CLASS := STATIC_LIBRARIES - -LOCAL_SRC_FILES := \ - $(i965_compiler_FILES) - -LOCAL_C_INCLUDES := \ - $(MESA_DRI_C_INCLUDES) \ - $(MESA_TOP)/src/intel \ - $(MESA_TOP)/src/compiler/nir \ - $(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_nir,,)/nir \ - $(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_glsl,,)/glsl - -LOCAL_SHARED_LIBRARIES := \ - libdrm_intel - -include $(LOCAL_PATH)/Android.gen.mk -include $(MESA_COMMON_MK) -include $(BUILD_STATIC_LIBRARY) - -# --- # Build i965_dri # --- @@ -209,9 +183,9 @@ LOCAL_WHOLE_STATIC_LIBRARIES := \ $(MESA_DRI_WHOLE_STATIC_LIBRARIES) \ $(I965_PERGEN_LIBS) \ libmesa_intel_common \ - libmesa_blorp \ libmesa_isl \ - libmesa_i965_compiler + libmesa_blorp \ + libmesa_intel_compiler LOCAL_SHARED_LIBRARIES := \ $(M
[Mesa-dev] [PATCH] android: fix outdir for gen_enum_to_str files
when files are being generated the value of $intermediates var content can be completely random, this makes sure that outdir is the wanted one. Fixes: 3f2cb699 ("android: vulkan: add support for libmesa_vulkan_util") Signed-off-by: Tapani Pälli --- src/vulkan/Android.mk | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/vulkan/Android.mk b/src/vulkan/Android.mk index 9f71d8f..7653f34 100644 --- a/src/vulkan/Android.mk +++ b/src/vulkan/Android.mk @@ -1,4 +1,5 @@ # Copyright © 2017 Mauro Rossi +# Copyright © 2017 Intel Corporation # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), @@ -45,7 +46,7 @@ vulkan_api_xml = $(MESA_TOP)/src/vulkan/registry/vk.xml $(LOCAL_GENERATED_SOURCES): $(MESA_TOP)/src/vulkan/util/gen_enum_to_str.py $(vulkan_api_xml) @echo "target Generated: $(PRIVATE_MODULE) <= $(notdir $(@))" @mkdir -p $(dir $@) - $(hide) $(MESA_PYTHON2) $(MESA_TOP)/src/vulkan/util/gen_enum_to_str.py --xml $(vulkan_api_xml) --outdir $(intermediates)/util + $(hide) $(MESA_PYTHON2) $(MESA_TOP)/src/vulkan/util/gen_enum_to_str.py --xml $(vulkan_api_xml) --outdir $(dir $@) LOCAL_EXPORT_C_INCLUDE_DIRS := \ $(intermediates) -- 2.9.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] configure.ac: require pthread-stubs only where available
On Thursday, 2017-03-02 19:02:44 +, Emil Velikov wrote: > From: Emil Velikov > > The project is a thing only for BSD platforms. Or in other words - for > any other platforms building/installing pthread-stubs results only in a > pthread-stub.pc file. > > And even where it provides a DSO, there's a fundamental design issue > with it - see the pthread-stubs mailing list for the specifics. > > Cc: Jeremy Huddleston Sequoia > CC: Gary Wong > Cc: Randy Fishel > Cc: Niveditha Rau > Signed-off-by: Emil Velikov This patch has been on ArchLinux for a while, and is therefore Tested-by: Eric Engestrom (as well as thousands of other people) > --- > Jeremy, others, > > Afaict pthread-stubs expands to a simple .pc on your platforms, but a > confirmation will be greatly appreciated. > --- > configure.ac | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/configure.ac b/configure.ac > index a3d1a00bdd..e94e46a0b8 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -799,7 +799,7 @@ fi > > dnl pthread-stubs is mandatory on targets where it exists > case "$host_os" in > -cygwin* ) > +linux* | cygwin* | darwin* | solaris* | gnu*) > pthread_stubs_possible="no" > ;; > * ) > -- > 2.11.1 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Any updates on threaded GL dispatch?
On 02/03/17 22:18, Marek Olšák wrote: The bad news is my involvement is currently on hold due to other projects and responsibilities. I can probably spend some time on this. Seems like Gregory has taken care of most of the problems and it just needs someone to push it over the line. Tim The good news is I'm testing some ideas on my new threaded Gallium dispatch. If I can make that improve perf on all CPU-bound apps, I can do the same with glthread. Marek On Thu, Mar 2, 2017 at 3:54 AM, Dieter Nützel wrote: Hello Gregory and Marek, are there any updates on threaded GL dispatch? I mean this: [-] As a quick summary: * there are now only 2 minors fail on piglit with my latest patches (sent to Marek) * I have a pending patch to allow asynchronous PBO transfer * Now that piglit is crash free I will give a try to both glxgear and glmark. Hopefully they will be both good. Gregory And this: The number of tests run doesn't necessarily correspond to the amount of test coverage. 10 tests doing different things can be more useful than 1 tests doing the same thing. Marek Fair point. As a side note, I tested both glxgear and glmark2 which are now crash-free :) Where can I grep 'latest' code with these patches? GREAT success! -Dieter ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 99987] Mesa 13+ breaks Xvnc (and similar X servers)
https://bugs.freedesktop.org/show_bug.cgi?id=99987 --- Comment #5 from Pierre Ossman --- The analysis on bug 99027 seems to be only about why it is crashing, and not why Mesa has changed its requirements on the X server. There is some talk about 8 bit depth, but the issue occurs on standard bit depths as well. Digging further is also on my todo list, but unfortunately not near the top. -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] EGL/Android: Add EGL_EXT_buffer_age extension
As discussed offline, this passes all dEQP tests for the extension on Android-IA (let's add this to commit message when pushing). Reviewed-by: Tapani Pälli On 02/08/2017 04:46 AM, Xiaosong Wei wrote: This patch implements the EGL_EXT_buffer_age extension for Android. https://www.khronos.org/registry/EGL/extensions/EXT/EGL_EXT_buffer_age.txt --- src/egl/drivers/dri2/egl_dri2.h | 9 ++ src/egl/drivers/dri2/platform_android.c | 51 - 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h index f3d09dc..992e5b3 100644 --- a/src/egl/drivers/dri2/egl_dri2.h +++ b/src/egl/drivers/dri2/egl_dri2.h @@ -295,6 +295,15 @@ struct dri2_egl_surface /* EGL-owned buffers */ __DRIbuffer *local_buffers[__DRI_BUFFER_COUNT]; + + /* Used to record all the buffers created by ANativeWindow and their ages. +* Usually Android uses at most triple buffers in ANativeWindow +* so hardcode the number of color_buffers to 3. +*/ + struct { + struct ANativeWindowBuffer *buffer; + int age; + } color_buffers[3], *back; #endif #if defined(HAVE_SURFACELESS_PLATFORM) diff --git a/src/egl/drivers/dri2/platform_android.c b/src/egl/drivers/dri2/platform_android.c index 79fe81a..109b419 100644 --- a/src/egl/drivers/dri2/platform_android.c +++ b/src/egl/drivers/dri2/platform_android.c @@ -165,6 +165,33 @@ droid_window_dequeue_buffer(struct dri2_egl_surface *dri2_surf) dri2_surf->window->lockBuffer(dri2_surf->window, dri2_surf->buffer); #endif + /* Record all the buffers created by ANativeWindow and update back buffer +* for updating buffer's age in swap_buffers. +*/ + EGLBoolean updated = EGL_FALSE; + for (int i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++) { + if (!dri2_surf->color_buffers[i].buffer) { + dri2_surf->color_buffers[i].buffer = dri2_surf->buffer; + } + if (dri2_surf->color_buffers[i].buffer == dri2_surf->buffer) { + dri2_surf->back = &dri2_surf->color_buffers[i]; + updated = EGL_TRUE; + break; + } + } + + if (!updated) { + /* In case of all the buffers were recreated by ANativeWindow, reset + * the color_buffers + */ + for (int i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++) { + dri2_surf->color_buffers[i].buffer = NULL; + dri2_surf->color_buffers[i].age = 0; + } + dri2_surf->color_buffers[0].buffer = dri2_surf->buffer; + dri2_surf->back = &dri2_surf->color_buffers[0]; + } + return EGL_TRUE; } @@ -201,6 +228,7 @@ droid_window_enqueue_buffer(_EGLDisplay *disp, struct dri2_egl_surface *dri2_sur dri2_surf->buffer->common.decRef(&dri2_surf->buffer->common); dri2_surf->buffer = NULL; + dri2_surf->back = NULL; mtx_lock(&disp->Mutex); @@ -547,6 +575,20 @@ droid_image_get_buffers(__DRIdrawable *driDrawable, return 1; } +static EGLint +droid_query_buffer_age(_EGLDriver *drv, + _EGLDisplay *disp, _EGLSurface *surface) +{ + struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surface); + + if (update_buffers(dri2_surf) < 0) { + _eglError(EGL_BAD_ALLOC, "droid_query_buffer_age"); + return 0; + } + + return dri2_surf->back->age; +} + static EGLBoolean droid_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw) { @@ -556,6 +598,12 @@ droid_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw) if (dri2_surf->base.Type != EGL_WINDOW_BIT) return EGL_TRUE; + for (int i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++) { + if (dri2_surf->color_buffers[i].age > 0) + dri2_surf->color_buffers[i].age++; + } + dri2_surf->back->age = 1; + dri2_flush_drawable_for_swapbuffers(disp, draw); if (dri2_surf->buffer) @@ -989,7 +1037,7 @@ static struct dri2_egl_display_vtbl droid_display_vtbl = { .swap_buffers_region = dri2_fallback_swap_buffers_region, .post_sub_buffer = dri2_fallback_post_sub_buffer, .copy_buffers = dri2_fallback_copy_buffers, - .query_buffer_age = dri2_fallback_query_buffer_age, + .query_buffer_age = droid_query_buffer_age, .query_surface = droid_query_surface, .create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image, .get_sync_values = dri2_fallback_get_sync_values, @@ -1080,6 +1128,7 @@ dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *dpy) dpy->Extensions.ANDROID_framebuffer_target = EGL_TRUE; dpy->Extensions.ANDROID_image_native_buffer = EGL_TRUE; dpy->Extensions.ANDROID_recordable = EGL_TRUE; + dpy->Extensions.EXT_buffer_age = EGL_TRUE; /* Fill vtbl last to prevent accidentally calling virtual function during * initialization. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev