[Mesa-dev] [PATCH v2 20/20] ac: rework ac_llvm_extract_elem()

2017-12-12 Thread Timothy Arceri
Simplifies the logic a little and asserts index is 0.

Suggested-by: Nicolai Hähnle 
---
 src/amd/common/ac_llvm_build.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index faa08b6301c..a31f225e177 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -106,24 +106,24 @@ ac_get_llvm_num_components(LLVMValueRef value)
  ? LLVMGetVectorSize(type)
  : 1;
return num_components;
 }
 
 LLVMValueRef
 ac_llvm_extract_elem(struct ac_llvm_context *ac,
 LLVMValueRef value,
 int index)
 {
-   int count = ac_get_llvm_num_components(value);
-
-   if (count == 1)
+   if (LLVMGetTypeKind(LLVMTypeOf(value)) != LLVMVectorTypeKind) {
+   assert(index == 0);
return value;
+   }
 
return LLVMBuildExtractElement(ac->builder, value,
   LLVMConstInt(ac->i32, index, false), "");
 }
 
 unsigned
 ac_get_type_size(LLVMTypeRef type)
 {
LLVMTypeKind kind = LLVMGetTypeKind(type);
 
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 14/20] ac/radeonsi: add load_tess_coord() to the abi

2017-12-12 Thread Timothy Arceri
Reviewed-by: Nicolai Hähnle 
---
 src/amd/common/ac_nir_to_llvm.c  | 20 +--
 src/amd/common/ac_shader_abi.h   |  4 +++
 src/gallium/drivers/radeonsi/si_shader.c | 42 +++-
 3 files changed, 42 insertions(+), 24 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index df761782b64..e17b5ef888c 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4024,37 +4024,38 @@ visit_emit_vertex(struct ac_shader_abi *abi, unsigned 
stream, LLVMValueRef *addr
 }
 
 static void
 visit_end_primitive(struct nir_to_llvm_context *ctx,
const nir_intrinsic_instr *instr)
 {
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (0 << 
8), ctx->gs_wave_id);
 }
 
 static LLVMValueRef
-visit_load_tess_coord(struct nir_to_llvm_context *ctx,
- const nir_intrinsic_instr *instr)
+load_tess_coord(struct ac_shader_abi *abi, LLVMTypeRef type,
+   unsigned num_components)
 {
+   struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
+
LLVMValueRef coord[4] = {
ctx->tes_u,
ctx->tes_v,
ctx->ac.f32_0,
ctx->ac.f32_0,
};
 
if (ctx->tes_primitive_mode == GL_TRIANGLES)
coord[2] = LLVMBuildFSub(ctx->builder, ctx->ac.f32_1,
LLVMBuildFAdd(ctx->builder, coord[0], 
coord[1], ""), "");
 
-   LLVMValueRef result = ac_build_gather_values(&ctx->ac, coord, 
instr->num_components);
-   return LLVMBuildBitCast(ctx->builder, result,
-   get_def_type(ctx->nir, &instr->dest.ssa), "");
+   LLVMValueRef result = ac_build_gather_values(&ctx->ac, coord, 
num_components);
+   return LLVMBuildBitCast(ctx->builder, result, type, "");
 }
 
 static void visit_intrinsic(struct ac_nir_context *ctx,
 nir_intrinsic_instr *instr)
 {
LLVMValueRef result = NULL;
 
switch (instr->intrinsic) {
case nir_intrinsic_load_work_group_id: {
result = ctx->nctx->workgroup_ids;
@@ -4228,23 +4229,27 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
case nir_intrinsic_interp_var_at_offset:
result = visit_interp(ctx->nctx, instr);
break;
case nir_intrinsic_emit_vertex:
assert(instr->const_index[0] == 0);
ctx->abi->emit_vertex(ctx->abi, 0, ctx->outputs);
break;
case nir_intrinsic_end_primitive:
visit_end_primitive(ctx->nctx, instr);
break;
-   case nir_intrinsic_load_tess_coord:
-   result = visit_load_tess_coord(ctx->nctx, instr);
+   case nir_intrinsic_load_tess_coord: {
+   LLVMTypeRef type = ctx->nctx ?
+   get_def_type(ctx->nctx->nir, &instr->dest.ssa) :
+   NULL;
+   result = ctx->abi->load_tess_coord(ctx->abi, type, 
instr->num_components);
break;
+   }
case nir_intrinsic_load_patch_vertices_in:
result = LLVMConstInt(ctx->ac.i32, 
ctx->nctx->options->key.tcs.input_vertices, false);
break;
default:
fprintf(stderr, "Unknown intrinsic: ");
nir_print_instr(&instr->instr, stderr);
fprintf(stderr, "\n");
break;
}
if (result) {
@@ -6592,20 +6597,21 @@ LLVMModuleRef 
ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
ctx.gs_max_out_vertices = 
shaders[i]->info.gs.vertices_out;
ctx.abi.load_inputs = load_gs_input;
} else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) {
ctx.tcs_outputs_read = shaders[i]->info.outputs_read;
ctx.tcs_patch_outputs_read = 
shaders[i]->info.patch_outputs_read;
ctx.abi.load_tess_inputs = load_tcs_input;
ctx.abi.store_tcs_outputs = store_tcs_output;
} else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) {
ctx.tes_primitive_mode = 
shaders[i]->info.tess.primitive_mode;
ctx.abi.load_tess_inputs = load_tes_input;
+   ctx.abi.load_tess_coord = load_tess_coord;
} else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) {
if (shader_info->info.vs.needs_instance_id) {
ctx.shader_info->vs.vgpr_comp_cnt =
MAX2(3, 
ctx.shader_info->vs.vgpr_comp_cnt);
}
} else if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) {
shader_info->fs.can_discard = 
shaders[i]->info.fs.uses_discard;
}
 
if (i)
diff 

[Mesa-dev] [PATCH v2 16/20] gallium/tgsi: add patch support to tgsi_get_gl_varying_semantic()

2017-12-12 Thread Timothy Arceri
Reviewed-by: Nicolai Hähnle 
---
 src/gallium/auxiliary/tgsi/tgsi_from_mesa.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_from_mesa.c 
b/src/gallium/auxiliary/tgsi/tgsi_from_mesa.c
index c014115918e..659156b5190 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_from_mesa.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_from_mesa.c
@@ -147,23 +147,28 @@ tgsi_get_gl_varying_semantic(gl_varying_slot attr,
   if (needs_texcoord_semantic) {
  *semantic_name = TGSI_SEMANTIC_TEXCOORD;
  *semantic_index = attr - VARYING_SLOT_TEX0;
  break;
   }
   /* fall through */
case VARYING_SLOT_VAR0:
default:
   assert(attr >= VARYING_SLOT_VAR0 ||
  (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
-  *semantic_name = TGSI_SEMANTIC_GENERIC;
-  *semantic_index =
- tgsi_get_generic_gl_varying_index(attr, needs_texcoord_semantic);
+  if (attr >= VARYING_SLOT_PATCH0) {
+ *semantic_name = TGSI_SEMANTIC_PATCH;
+ *semantic_index = attr - VARYING_SLOT_PATCH0;
+  } else {
+ *semantic_name = TGSI_SEMANTIC_GENERIC;
+ *semantic_index =
+tgsi_get_generic_gl_varying_index(attr, needs_texcoord_semantic);
+  }
   break;
}
 }
 
 /**
  * Determine the semantic name and index used for the given fragment shader
  * result.
  */
 void
 tgsi_get_gl_frag_result_semantic(gl_frag_result frag_result,
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 18/20] radeonsi: add si_load_tess_level() helper

2017-12-12 Thread Timothy Arceri
This will be shared by the tgsi and nir backends.

Reviewed-by: Nicolai Hähnle 
---
 src/gallium/drivers/radeonsi/si_shader.c | 31 ++-
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 7f874d06db6..17e55d7fc82 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1914,20 +1914,36 @@ static LLVMValueRef si_load_tess_coord(struct 
ac_shader_abi *abi,
 
/* For triangles, the vector should be (u, v, 1-u-v). */
if (ctx->shader->selector->info.properties[TGSI_PROPERTY_TES_PRIM_MODE] 
==
PIPE_PRIM_TRIANGLES)
coord[2] = lp_build_sub(bld, ctx->ac.f32_1,
lp_build_add(bld, coord[0], coord[1]));
 
return lp_build_gather_values(&ctx->gallivm, coord, 4);
 }
 
+static LLVMValueRef si_load_tess_level(struct ac_shader_abi *abi, int param)
+{
+   struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+   LLVMValueRef buffer, base, addr;
+
+   buffer = desc_from_addr_base64k(ctx, 
ctx->param_tcs_offchip_addr_base64k);
+
+   base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset);
+   addr = get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), NULL,
+ LLVMConstInt(ctx->i32, param, 0));
+
+   return buffer_load(&ctx->bld_base, ctx->f32,
+  ~0, buffer, base, addr, true);
+
+}
+
 void si_load_system_value(struct si_shader_context *ctx,
  unsigned index,
  const struct tgsi_full_declaration *decl)
 {
LLVMValueRef value = 0;
 
assert(index < RADEON_LLVM_MAX_SYSTEM_VALUES);
 
switch (decl->Semantic.Name) {
case TGSI_SEMANTIC_INSTANCEID:
@@ -2031,34 +2047,23 @@ void si_load_system_value(struct si_shader_context *ctx,
case TGSI_SEMANTIC_VERTICESIN:
if (ctx->type == PIPE_SHADER_TESS_CTRL)
value = unpack_param(ctx, 
ctx->param_tcs_out_lds_layout, 26, 6);
else if (ctx->type == PIPE_SHADER_TESS_EVAL)
value = get_num_tcs_out_vertices(ctx);
else
assert(!"invalid shader stage for 
TGSI_SEMANTIC_VERTICESIN");
break;
 
case TGSI_SEMANTIC_TESSINNER:
-   case TGSI_SEMANTIC_TESSOUTER:
-   {
-   LLVMValueRef buffer, base, addr;
+   case TGSI_SEMANTIC_TESSOUTER: {
int param = 
si_shader_io_get_unique_index_patch(decl->Semantic.Name, 0);
-
-   buffer = desc_from_addr_base64k(ctx, 
ctx->param_tcs_offchip_addr_base64k);
-
-   base = LLVMGetParam(ctx->main_fn, 
ctx->param_tcs_offchip_offset);
-   addr = get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), 
NULL,
- LLVMConstInt(ctx->i32, param, 0));
-
-   value = buffer_load(&ctx->bld_base, ctx->f32,
-   ~0, buffer, base, addr, true);
-
+   value = si_load_tess_level(&ctx->abi, param);
break;
}
 
case TGSI_SEMANTIC_DEFAULT_TESSOUTER_SI:
case TGSI_SEMANTIC_DEFAULT_TESSINNER_SI:
{
LLVMValueRef buf, slot, val[4];
int i, offset;
 
slot = LLVMConstInt(ctx->i32, SI_HS_CONST_DEFAULT_TESS_LEVELS, 
0);
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 15/20] radeonsi: add dummy implementation of si_nir_scan_tess_ctrl()

2017-12-12 Thread Timothy Arceri
Reviewed-by: Nicolai Hähnle 
---
 src/gallium/drivers/radeonsi/si_shader.h|  3 +++
 src/gallium/drivers/radeonsi/si_shader_nir.c| 19 +++
 src/gallium/drivers/radeonsi/si_state_shaders.c |  1 +
 3 files changed, 23 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index bcb5c9da4ce..87aa6d416f2 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -647,20 +647,23 @@ void si_shader_apply_scratch_relocs(struct si_shader 
*shader,
 void si_shader_binary_read_config(struct ac_shader_binary *binary,
  struct si_shader_config *conf,
  unsigned symbol_offset);
 unsigned si_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
bool writes_samplemask);
 const char *si_get_shader_name(const struct si_shader *shader, unsigned 
processor);
 
 /* si_shader_nir.c */
 void si_nir_scan_shader(const struct nir_shader *nir,
struct tgsi_shader_info *info);
+void si_nir_scan_tess_ctrl(const struct nir_shader *nir,
+  const struct tgsi_shader_info *info,
+  struct tgsi_tessctrl_info *out);
 void si_lower_nir(struct si_shader_selector *sel);
 
 /* Inline helpers. */
 
 /* Return the pointer to the main shader part's pointer. */
 static inline struct si_shader **
 si_get_main_shader_part(struct si_shader_selector *sel,
struct si_shader_key *key)
 {
if (key->as_ls)
diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index d2760b03bca..f96bf7c2d2f 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -123,20 +123,39 @@ static void scan_instruction(struct tgsi_shader_info 
*info,
case nir_intrinsic_ssbo_atomic_exchange:
case nir_intrinsic_ssbo_atomic_comp_swap:
info->writes_memory = true;
break;
default:
break;
}
}
 }
 
+void si_nir_scan_tess_ctrl(const struct nir_shader *nir,
+  const struct tgsi_shader_info *info,
+  struct tgsi_tessctrl_info *out)
+{
+   memset(out, 0, sizeof(*out));
+
+   if (nir->info.stage != MESA_SHADER_TESS_CTRL)
+   return;
+
+   /* Initial value = true. Here the pass will accumulate results from
+* multiple segments surrounded by barriers. If tess factors aren't
+* written at all, it's a shader bug and we don't care if this will be
+* true.
+*/
+   out->tessfactors_are_def_in_all_invocs = true;
+
+   /* TODO: Implement scanning of tess factors, see tgsi backend. */
+}
+
 void si_nir_scan_shader(const struct nir_shader *nir,
struct tgsi_shader_info *info)
 {
nir_function *func;
unsigned i;
 
assert(nir->info.stage == MESA_SHADER_VERTEX ||
   nir->info.stage == MESA_SHADER_GEOMETRY ||
   nir->info.stage == MESA_SHADER_FRAGMENT);
 
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 4f683b85144..f70af15e113 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2052,20 +2052,21 @@ static void *si_create_shader_selector(struct 
pipe_context *ctx,
}
 
tgsi_scan_shader(state->tokens, &sel->info);
tgsi_scan_tess_ctrl(state->tokens, &sel->info, &sel->tcs_info);
} else {
assert(state->type == PIPE_SHADER_IR_NIR);
 
sel->nir = state->ir.nir;
 
si_nir_scan_shader(sel->nir, &sel->info);
+   si_nir_scan_tess_ctrl(sel->nir, &sel->info, &sel->tcs_info);
 
si_lower_nir(sel);
}
 
sel->type = sel->info.processor;
p_atomic_inc(&sscreen->num_shaders_created);
si_get_active_slot_masks(&sel->info,
 &sel->active_const_and_shader_buffers,
 &sel->active_samplers_and_images);
 
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 13/20] radeonsi: make si_llvm_emit_tcs_epilogue compatible with emit_outputs abi

2017-12-12 Thread Timothy Arceri
Reviewed-by: Nicolai Hähnle 
---
 src/gallium/drivers/radeonsi/si_shader.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 4823da7a763..04dae8edd13 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3237,23 +3237,26 @@ si_insert_input_ptr_as_2xi32(struct si_shader_context 
*ctx, LLVMValueRef ret,
ptr = LLVMGetParam(ctx->main_fn, param);
ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i64, "");
ptr = LLVMBuildBitCast(builder, ptr, ctx->v2i32, "");
lo = LLVMBuildExtractElement(builder, ptr, ctx->i32_0, "");
hi = LLVMBuildExtractElement(builder, ptr, ctx->i32_1, "");
ret = LLVMBuildInsertValue(builder, ret, lo, return_index, "");
return LLVMBuildInsertValue(builder, ret, hi, return_index + 1, "");
 }
 
 /* This only writes the tessellation factor levels. */
-static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
+static void si_llvm_emit_tcs_epilogue(struct ac_shader_abi *abi,
+ unsigned max_outputs,
+ LLVMValueRef *addrs)
 {
-   struct si_shader_context *ctx = si_shader_context(bld_base);
+   struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+   struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
 
si_copy_tcs_inputs(bld_base);
 
rel_patch_id = get_rel_patch_id(ctx);
invocation_id = unpack_llvm_param(ctx, ctx->abi.tcs_rel_ids, 8, 5);
tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
 
if (ctx->screen->info.chip_class >= GFX9) {
@@ -6042,21 +6045,22 @@ static bool si_compile_tgsi_main(struct 
si_shader_context *ctx,
else
ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
bld_base->emit_epilogue = si_tgsi_emit_epilogue;
break;
case PIPE_SHADER_TESS_CTRL:
bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tcs;
ctx->abi.load_tess_inputs = si_nir_load_input_tcs;
bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = fetch_output_tcs;
bld_base->emit_store = store_output_tcs;
ctx->abi.store_tcs_outputs = si_nir_store_output_tcs;
-   bld_base->emit_epilogue = si_llvm_emit_tcs_epilogue;
+   ctx->abi.emit_outputs = si_llvm_emit_tcs_epilogue;
+   bld_base->emit_epilogue = si_tgsi_emit_epilogue;
break;
case PIPE_SHADER_TESS_EVAL:
bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tes;
ctx->abi.load_tess_inputs = si_nir_load_input_tes;
if (shader->key.as_es)
ctx->abi.emit_outputs = si_llvm_emit_es_epilogue;
else
ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
bld_base->emit_epilogue = si_tgsi_emit_epilogue;
break;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 17/20] st/glsl_to_nir/radeonsi: enable tessellation shaders

2017-12-12 Thread Timothy Arceri
Reviewed-by: Nicolai Hähnle 
---
 src/gallium/drivers/radeonsi/si_shader_nir.c | 2 ++
 src/mesa/state_tracker/st_glsl_to_nir.cpp| 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index f96bf7c2d2f..5ac020d9fc4 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -150,20 +150,22 @@ void si_nir_scan_tess_ctrl(const struct nir_shader *nir,
 }
 
 void si_nir_scan_shader(const struct nir_shader *nir,
struct tgsi_shader_info *info)
 {
nir_function *func;
unsigned i;
 
assert(nir->info.stage == MESA_SHADER_VERTEX ||
   nir->info.stage == MESA_SHADER_GEOMETRY ||
+  nir->info.stage == MESA_SHADER_TESS_CTRL ||
+  nir->info.stage == MESA_SHADER_TESS_EVAL ||
   nir->info.stage == MESA_SHADER_FRAGMENT);
 
info->processor = pipe_shader_type_from_mesa(nir->info.stage);
info->num_tokens = 2; /* indicate that the shader is non-empty */
info->num_instructions = 2;
 
if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
info->properties[TGSI_PROPERTY_TCS_VERTICES_OUT] =
nir->info.tess.tcs_vertices_out;
}
diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp 
b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index 7357eebae0c..70c5daaa225 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -649,21 +649,23 @@ st_finalize_nir(struct st_context *st, struct gl_program 
*prog,
   /* Needs special handling so drvloc matches the vbo state: */
   st_nir_assign_vs_in_locations(prog, nir);
   /* Re-lower global vars, to deal with any dead VS inputs. */
   NIR_PASS_V(nir, nir_lower_global_vars_to_local);
 
   sort_varyings(&nir->outputs);
   st_nir_assign_var_locations(&nir->outputs,
   &nir->num_outputs,
   nir->info.stage);
   st_nir_fixup_varying_slots(st, &nir->outputs);
-   } else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
+   } else if (nir->info.stage == MESA_SHADER_GEOMETRY ||
+  nir->info.stage == MESA_SHADER_TESS_CTRL ||
+  nir->info.stage == MESA_SHADER_TESS_EVAL) {
   sort_varyings(&nir->inputs);
   st_nir_assign_var_locations(&nir->inputs,
   &nir->num_inputs,
   nir->info.stage);
   st_nir_fixup_varying_slots(st, &nir->inputs);
 
   sort_varyings(&nir->outputs);
   st_nir_assign_var_locations(&nir->outputs,
   &nir->num_outputs,
   nir->info.stage);
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 19/20] ac: add load_tess_level() to the abi

2017-12-12 Thread Timothy Arceri
Fixes the following piglit tests in radeonsi:

vs-tcs-tes-tessinner-tessouter-inputs-quads.shader_test
vs-tcs-tes-tessinner-tessouter-inputs-tris.shader_test
vs-tes-tessinner-tessouter-inputs-quads.shader_test
vs-tes-tessinner-tessouter-inputs-tris.shader_test

Reviewed-by: Nicolai Hähnle 
---
 src/amd/common/ac_nir_to_llvm.c  | 6 ++
 src/amd/common/ac_shader_abi.h   | 4 
 src/gallium/drivers/radeonsi/si_shader.c | 1 +
 3 files changed, 11 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index e17b5ef888c..d84fa51c251 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4236,20 +4236,26 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
case nir_intrinsic_end_primitive:
visit_end_primitive(ctx->nctx, instr);
break;
case nir_intrinsic_load_tess_coord: {
LLVMTypeRef type = ctx->nctx ?
get_def_type(ctx->nctx->nir, &instr->dest.ssa) :
NULL;
result = ctx->abi->load_tess_coord(ctx->abi, type, 
instr->num_components);
break;
}
+   case nir_intrinsic_load_tess_level_outer:
+   result = ctx->abi->load_tess_level(ctx->abi, 
shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER));
+   break;
+   case nir_intrinsic_load_tess_level_inner:
+   result = ctx->abi->load_tess_level(ctx->abi, 
shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER));
+   break;
case nir_intrinsic_load_patch_vertices_in:
result = LLVMConstInt(ctx->ac.i32, 
ctx->nctx->options->key.tcs.input_vertices, false);
break;
default:
fprintf(stderr, "Unknown intrinsic: ");
nir_print_instr(&instr->instr, stderr);
fprintf(stderr, "\n");
break;
}
if (result) {
diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h
index 277e4efe47e..992ed52cf73 100644
--- a/src/amd/common/ac_shader_abi.h
+++ b/src/amd/common/ac_shader_abi.h
@@ -96,20 +96,24 @@ struct ac_shader_abi {
  LLVMValueRef src,
  unsigned component,
  bool is_patch,
  bool is_compact,
  unsigned writemask);
 
LLVMValueRef (*load_tess_coord)(struct ac_shader_abi *abi,
LLVMTypeRef type,
unsigned num_components);
 
+   LLVMValueRef (*load_tess_level)(struct ac_shader_abi *abi,
+   int param);
+
+
LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index);
 
/**
 * Load the descriptor for the given buffer.
 *
 * \param buffer the buffer as presented in NIR: this is the descriptor
 *   in Vulkan, and the buffer index in OpenGL/Gallium
 * \param write whether buffer contents will be written
 */
LLVMValueRef (*load_ssbo)(struct ac_shader_abi *abi,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 17e55d7fc82..ae0047177e5 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6064,20 +6064,21 @@ static bool si_compile_tgsi_main(struct 
si_shader_context *ctx,
bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = fetch_output_tcs;
bld_base->emit_store = store_output_tcs;
ctx->abi.store_tcs_outputs = si_nir_store_output_tcs;
ctx->abi.emit_outputs = si_llvm_emit_tcs_epilogue;
bld_base->emit_epilogue = si_tgsi_emit_epilogue;
break;
case PIPE_SHADER_TESS_EVAL:
bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tes;
ctx->abi.load_tess_inputs = si_nir_load_input_tes;
ctx->abi.load_tess_coord = si_load_tess_coord;
+   ctx->abi.load_tess_level = si_load_tess_level;
if (shader->key.as_es)
ctx->abi.emit_outputs = si_llvm_emit_es_epilogue;
else
ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
bld_base->emit_epilogue = si_tgsi_emit_epilogue;
break;
case PIPE_SHADER_GEOMETRY:
bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
ctx->abi.load_inputs = si_nir_load_input_gs;
ctx->abi.emit_vertex = si_llvm_emit_vertex;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 12/20] radeonsi/nir: gather tess properties

2017-12-12 Thread Timothy Arceri
Reviewed-by: Nicolai Hähnle 
---
 src/gallium/drivers/radeonsi/si_shader_nir.c | 29 
 1 file changed, 29 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 4138e04dcb5..d2760b03bca 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -76,32 +76,39 @@ static void scan_instruction(struct tgsi_shader_info *info,
} else if (instr->type == nir_instr_type_intrinsic) {
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 
switch (intr->intrinsic) {
case nir_intrinsic_load_front_face:
info->uses_frontface = 1;
break;
case nir_intrinsic_load_instance_id:
info->uses_instanceid = 1;
break;
+   case nir_intrinsic_load_invocation_id:
+   info->uses_invocationid = true;
+   break;
case nir_intrinsic_load_vertex_id:
info->uses_vertexid = 1;
break;
case nir_intrinsic_load_vertex_id_zero_base:
info->uses_vertexid_nobase = 1;
break;
case nir_intrinsic_load_base_vertex:
info->uses_basevertex = 1;
break;
case nir_intrinsic_load_primitive_id:
info->uses_primid = 1;
break;
+   case nir_intrinsic_load_tess_level_inner:
+   case nir_intrinsic_load_tess_level_outer:
+   info->reads_tess_factors = true;
+   break;
case nir_intrinsic_image_store:
case nir_intrinsic_image_atomic_add:
case nir_intrinsic_image_atomic_min:
case nir_intrinsic_image_atomic_max:
case nir_intrinsic_image_atomic_and:
case nir_intrinsic_image_atomic_or:
case nir_intrinsic_image_atomic_xor:
case nir_intrinsic_image_atomic_exchange:
case nir_intrinsic_image_atomic_comp_swap:
case nir_intrinsic_store_ssbo:
@@ -130,20 +137,42 @@ void si_nir_scan_shader(const struct nir_shader *nir,
unsigned i;
 
assert(nir->info.stage == MESA_SHADER_VERTEX ||
   nir->info.stage == MESA_SHADER_GEOMETRY ||
   nir->info.stage == MESA_SHADER_FRAGMENT);
 
info->processor = pipe_shader_type_from_mesa(nir->info.stage);
info->num_tokens = 2; /* indicate that the shader is non-empty */
info->num_instructions = 2;
 
+   if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
+   info->properties[TGSI_PROPERTY_TCS_VERTICES_OUT] =
+   nir->info.tess.tcs_vertices_out;
+   }
+
+   if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
+   if (nir->info.tess.primitive_mode == GL_ISOLINES)
+   info->properties[TGSI_PROPERTY_TES_PRIM_MODE] = 
GL_LINES;
+   else
+   info->properties[TGSI_PROPERTY_TES_PRIM_MODE] = 
nir->info.tess.primitive_mode;
+
+   STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == 
PIPE_TESS_SPACING_EQUAL);
+   STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 ==
+ PIPE_TESS_SPACING_FRACTIONAL_ODD);
+   STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 ==
+ PIPE_TESS_SPACING_FRACTIONAL_EVEN);
+
+   info->properties[TGSI_PROPERTY_TES_SPACING] = 
(nir->info.tess.spacing + 1) % 3;
+   info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW] = 
!nir->info.tess.ccw;
+   info->properties[TGSI_PROPERTY_TES_POINT_MODE] = 
nir->info.tess.point_mode;
+   }
+
if (nir->info.stage == MESA_SHADER_GEOMETRY) {
info->properties[TGSI_PROPERTY_GS_INPUT_PRIM] = 
nir->info.gs.input_primitive;
info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM] = 
nir->info.gs.output_primitive;
info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES] = 
nir->info.gs.vertices_out;
info->properties[TGSI_PROPERTY_GS_INVOCATIONS] = 
nir->info.gs.invocations;
}
 
i = 0;
uint64_t processed_inputs = 0;
unsigned num_inputs = 0;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 10/20] radeonsi: add unpack_llvm_param() helper

2017-12-12 Thread Timothy Arceri
This allows us to pass the llvm param directly rather than looking
it up.

Reviewed-by: Nicolai Hähnle 
---
 src/gallium/drivers/radeonsi/si_shader.c | 18 --
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index ed9cb8a2261..419f5b6cebb 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -233,43 +233,49 @@ unsigned si_shader_io_get_unique_index(unsigned 
semantic_name, unsigned index)
return SI_MAX_IO_GENERIC + 10 + index;
default:
assert(!"invalid semantic name");
return 0;
}
 }
 
 /**
  * Get the value of a shader input parameter and extract a bitfield.
  */
-static LLVMValueRef unpack_param(struct si_shader_context *ctx,
-unsigned param, unsigned rshift,
-unsigned bitwidth)
+static LLVMValueRef unpack_llvm_param(struct si_shader_context *ctx,
+ LLVMValueRef value, unsigned rshift,
+ unsigned bitwidth)
 {
-   LLVMValueRef value = LLVMGetParam(ctx->main_fn,
- param);
-
if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMFloatTypeKind)
value = ac_to_integer(&ctx->ac, value);
 
if (rshift)
value = LLVMBuildLShr(ctx->ac.builder, value,
  LLVMConstInt(ctx->i32, rshift, 0), "");
 
if (rshift + bitwidth < 32) {
unsigned mask = (1 << bitwidth) - 1;
value = LLVMBuildAnd(ctx->ac.builder, value,
 LLVMConstInt(ctx->i32, mask, 0), "");
}
 
return value;
 }
 
+static LLVMValueRef unpack_param(struct si_shader_context *ctx,
+unsigned param, unsigned rshift,
+unsigned bitwidth)
+{
+   LLVMValueRef value = LLVMGetParam(ctx->main_fn, param);
+
+   return unpack_llvm_param(ctx, value, rshift, bitwidth);
+}
+
 static LLVMValueRef get_rel_patch_id(struct si_shader_context *ctx)
 {
switch (ctx->type) {
case PIPE_SHADER_TESS_CTRL:
return unpack_param(ctx, ctx->param_tcs_rel_ids, 0, 8);
 
case PIPE_SHADER_TESS_EVAL:
return LLVMGetParam(ctx->main_fn,
ctx->param_tes_rel_patch_id);
 
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 09/20] ac: add {tcs,tes}_patch_id to the abi

2017-12-12 Thread Timothy Arceri
Reviewed-by: Nicolai Hähnle 
---
 src/amd/common/ac_nir_to_llvm.c   | 22 +++---
 src/amd/common/ac_shader_abi.h|  2 ++
 src/gallium/drivers/radeonsi/si_shader.c  | 17 -
 src/gallium/drivers/radeonsi/si_shader_internal.h |  2 --
 4 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 6060df75314..29c2bb26221 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -103,24 +103,22 @@ struct nir_to_llvm_context {
LLVMValueRef ls_out_layout;
LLVMValueRef es2gs_offset;
 
LLVMValueRef tcs_offchip_layout;
LLVMValueRef tcs_out_offsets;
LLVMValueRef tcs_out_layout;
LLVMValueRef tcs_in_layout;
LLVMValueRef oc_lds;
LLVMValueRef merged_wave_info;
LLVMValueRef tess_factor_offset;
-   LLVMValueRef tcs_patch_id;
LLVMValueRef tcs_rel_ids;
LLVMValueRef tes_rel_patch_id;
-   LLVMValueRef tes_patch_id;
LLVMValueRef tes_u;
LLVMValueRef tes_v;
 
LLVMValueRef gsvs_ring_stride;
LLVMValueRef gsvs_num_entries;
LLVMValueRef gs2vs_offset;
LLVMValueRef gs_wave_id;
LLVMValueRef gs_vtx_offset[6];
 
LLVMValueRef esgs_ring;
@@ -782,57 +780,57 @@ static void create_function(struct nir_to_llvm_context 
*ctx,
radv_define_vs_user_sgprs_phase1(ctx, stage, 
has_previous_stage, previous_stage, &args);
add_user_sgpr_argument(&args, ctx->ac.i32, 
&ctx->ls_out_layout); // ls out layout
 
add_user_sgpr_argument(&args, ctx->ac.i32, 
&ctx->tcs_offchip_layout); // tcs offchip layout
add_user_sgpr_argument(&args, ctx->ac.i32, 
&ctx->tcs_out_offsets); // tcs out offsets
add_user_sgpr_argument(&args, ctx->ac.i32, 
&ctx->tcs_out_layout); // tcs out layout
add_user_sgpr_argument(&args, ctx->ac.i32, 
&ctx->tcs_in_layout); // tcs in layout
if (ctx->shader_info->info.needs_multiview_view_index)
add_user_sgpr_argument(&args, ctx->ac.i32, 
&ctx->view_index);
 
-   add_vgpr_argument(&args, ctx->ac.i32, 
&ctx->tcs_patch_id); // patch id
+   add_vgpr_argument(&args, ctx->ac.i32, 
&ctx->abi.tcs_patch_id); // patch id
add_vgpr_argument(&args, ctx->ac.i32, 
&ctx->tcs_rel_ids); // rel ids;
add_vgpr_argument(&args, ctx->ac.i32, 
&ctx->abi.vertex_id); // vertex id
add_vgpr_argument(&args, ctx->ac.i32, 
&ctx->rel_auto_id); // rel auto id
add_vgpr_argument(&args, ctx->ac.i32, 
&ctx->vs_prim_id); // vs prim id
add_vgpr_argument(&args, ctx->ac.i32, 
&ctx->abi.instance_id); // instance id
} else {
radv_define_common_user_sgprs_phase1(ctx, stage, 
has_previous_stage, previous_stage, &user_sgpr_info, &args, &desc_sets);
add_user_sgpr_argument(&args, ctx->ac.i32, 
&ctx->tcs_offchip_layout); // tcs offchip layout
add_user_sgpr_argument(&args, ctx->ac.i32, 
&ctx->tcs_out_offsets); // tcs out offsets
add_user_sgpr_argument(&args, ctx->ac.i32, 
&ctx->tcs_out_layout); // tcs out layout
add_user_sgpr_argument(&args, ctx->ac.i32, 
&ctx->tcs_in_layout); // tcs in layout
if (ctx->shader_info->info.needs_multiview_view_index)
add_user_sgpr_argument(&args, ctx->ac.i32, 
&ctx->view_index);
add_sgpr_argument(&args, ctx->ac.i32, &ctx->oc_lds); // 
param oc lds
add_sgpr_argument(&args, ctx->ac.i32, 
&ctx->tess_factor_offset); // tess factor offset
-   add_vgpr_argument(&args, ctx->ac.i32, 
&ctx->tcs_patch_id); // patch id
+   add_vgpr_argument(&args, ctx->ac.i32, 
&ctx->abi.tcs_patch_id); // patch id
add_vgpr_argument(&args, ctx->ac.i32, 
&ctx->tcs_rel_ids); // rel ids;
}
break;
case MESA_SHADER_TESS_EVAL:
radv_define_common_user_sgprs_phase1(ctx, stage, 
has_previous_stage, previous_stage, &user_sgpr_info, &args, &desc_sets);
add_user_sgpr_argument(&args, ctx->ac.i32, 
&ctx->tcs_offchip_layout); // tcs offchip layout
if (ctx->shader_info->info.needs_multiview_view_index || 
(!ctx->options->key.tes.as_es && ctx->options->key.has_multiview_view_index))
add_user_sgpr_argument(&args, ctx->ac.i32, 
&ctx->view_index);
if (ctx->options->key.tes.as_es) {
add_sgpr_argument(&args, ctx->ac.i32, &ctx->oc_lds); // 
OC LDS
add_sgpr_argument(

[Mesa-dev] [PATCH v2 11/20] ac/radeonsi: add tcs_rel_ids to the abi

2017-12-12 Thread Timothy Arceri
Reviewed-by: Nicolai Hähnle 
---
 src/amd/common/ac_nir_to_llvm.c   | 15 +++
 src/amd/common/ac_shader_abi.h|  1 +
 src/gallium/drivers/radeonsi/si_shader.c  | 19 ++-
 src/gallium/drivers/radeonsi/si_shader_internal.h |  1 -
 4 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 29c2bb26221..df761782b64 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -103,21 +103,20 @@ struct nir_to_llvm_context {
LLVMValueRef ls_out_layout;
LLVMValueRef es2gs_offset;
 
LLVMValueRef tcs_offchip_layout;
LLVMValueRef tcs_out_offsets;
LLVMValueRef tcs_out_layout;
LLVMValueRef tcs_in_layout;
LLVMValueRef oc_lds;
LLVMValueRef merged_wave_info;
LLVMValueRef tess_factor_offset;
-   LLVMValueRef tcs_rel_ids;
LLVMValueRef tes_rel_patch_id;
LLVMValueRef tes_u;
LLVMValueRef tes_v;
 
LLVMValueRef gsvs_ring_stride;
LLVMValueRef gsvs_num_entries;
LLVMValueRef gs2vs_offset;
LLVMValueRef gs_wave_id;
LLVMValueRef gs_vtx_offset[6];
 
@@ -412,21 +411,21 @@ static LLVMValueRef unpack_param(struct ac_llvm_context 
*ctx,
value = LLVMBuildAnd(ctx->builder, value,
 LLVMConstInt(ctx->i32, mask, false), "");
}
return value;
 }
 
 static LLVMValueRef get_rel_patch_id(struct nir_to_llvm_context *ctx)
 {
switch (ctx->stage) {
case MESA_SHADER_TESS_CTRL:
-   return unpack_param(&ctx->ac, ctx->tcs_rel_ids, 0, 8);
+   return unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 0, 8);
case MESA_SHADER_TESS_EVAL:
return ctx->tes_rel_patch_id;
break;
default:
unreachable("Illegal stage");
}
 }
 
 /* Tessellation shaders pass outputs to the next shader using LDS.
  *
@@ -781,37 +780,37 @@ static void create_function(struct nir_to_llvm_context 
*ctx,
add_user_sgpr_argument(&args, ctx->ac.i32, 
&ctx->ls_out_layout); // ls out layout
 
add_user_sgpr_argument(&args, ctx->ac.i32, 
&ctx->tcs_offchip_layout); // tcs offchip layout
add_user_sgpr_argument(&args, ctx->ac.i32, 
&ctx->tcs_out_offsets); // tcs out offsets
add_user_sgpr_argument(&args, ctx->ac.i32, 
&ctx->tcs_out_layout); // tcs out layout
add_user_sgpr_argument(&args, ctx->ac.i32, 
&ctx->tcs_in_layout); // tcs in layout
if (ctx->shader_info->info.needs_multiview_view_index)
add_user_sgpr_argument(&args, ctx->ac.i32, 
&ctx->view_index);
 
add_vgpr_argument(&args, ctx->ac.i32, 
&ctx->abi.tcs_patch_id); // patch id
-   add_vgpr_argument(&args, ctx->ac.i32, 
&ctx->tcs_rel_ids); // rel ids;
+   add_vgpr_argument(&args, ctx->ac.i32, 
&ctx->abi.tcs_rel_ids); // rel ids;
add_vgpr_argument(&args, ctx->ac.i32, 
&ctx->abi.vertex_id); // vertex id
add_vgpr_argument(&args, ctx->ac.i32, 
&ctx->rel_auto_id); // rel auto id
add_vgpr_argument(&args, ctx->ac.i32, 
&ctx->vs_prim_id); // vs prim id
add_vgpr_argument(&args, ctx->ac.i32, 
&ctx->abi.instance_id); // instance id
} else {
radv_define_common_user_sgprs_phase1(ctx, stage, 
has_previous_stage, previous_stage, &user_sgpr_info, &args, &desc_sets);
add_user_sgpr_argument(&args, ctx->ac.i32, 
&ctx->tcs_offchip_layout); // tcs offchip layout
add_user_sgpr_argument(&args, ctx->ac.i32, 
&ctx->tcs_out_offsets); // tcs out offsets
add_user_sgpr_argument(&args, ctx->ac.i32, 
&ctx->tcs_out_layout); // tcs out layout
add_user_sgpr_argument(&args, ctx->ac.i32, 
&ctx->tcs_in_layout); // tcs in layout
if (ctx->shader_info->info.needs_multiview_view_index)
add_user_sgpr_argument(&args, ctx->ac.i32, 
&ctx->view_index);
add_sgpr_argument(&args, ctx->ac.i32, &ctx->oc_lds); // 
param oc lds
add_sgpr_argument(&args, ctx->ac.i32, 
&ctx->tess_factor_offset); // tess factor offset
add_vgpr_argument(&args, ctx->ac.i32, 
&ctx->abi.tcs_patch_id); // patch id
-   add_vgpr_argument(&args, ctx->ac.i32, 
&ctx->tcs_rel_ids); // rel ids;
+   add_vgpr_argument(&args, ctx->ac.i32, 
&ctx->abi.tcs_rel_ids); // rel ids;
}
break;
case MESA_SHADER_TESS_EVAL:
radv_define_common_user_sgprs_phase1(ctx, stage, 
has_previous_stage,

[Mesa-dev] [PATCH v2 08/20] radeonsi: add nir support for tcs outputs

2017-12-12 Thread Timothy Arceri
Reviewed-by: Nicolai Hähnle 
---
 src/gallium/drivers/radeonsi/si_shader.c | 124 +++
 1 file changed, 124 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index c5ca699ac00..a8f425b50ee 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1426,20 +1426,143 @@ static void store_output_tcs(struct 
lp_build_tgsi_context *bld_base,
}
 
if (reg->Register.WriteMask == 0xF && !is_tess_factor) {
LLVMValueRef value = lp_build_gather_values(&ctx->gallivm,
values, 4);
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, 
buf_addr,
base, 0, 1, 0, true, false);
}
 }
 
+static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
+   LLVMValueRef vertex_index,
+   LLVMValueRef param_index,
+   unsigned const_index,
+   unsigned location,
+   unsigned driver_location,
+   LLVMValueRef src,
+   unsigned component,
+   bool is_patch,
+   bool is_compact,
+   unsigned writemask)
+{
+   struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+   struct tgsi_shader_info *info = &ctx->shader->selector->info;
+   LLVMValueRef dw_addr, stride;
+   LLVMValueRef buffer, base, addr;
+   LLVMValueRef values[4];
+   bool skip_lds_store;
+   bool is_tess_factor = false, is_tess_inner = false;
+
+   driver_location = driver_location / 4;
+
+   if (param_index) {
+   /* Add the constant index to the indirect index */
+   param_index = LLVMBuildAdd(ctx->ac.builder, param_index,
+  LLVMConstInt(ctx->i32, const_index, 
0), "");
+   } else {
+   if (const_index != 0)
+   param_index = LLVMConstInt(ctx->i32, const_index, 0);
+   }
+
+   if (!is_patch) {
+   stride = get_tcs_out_vertex_dw_stride(ctx);
+   dw_addr = get_tcs_out_current_patch_offset(ctx);
+   dw_addr = get_dw_address_from_generic_indices(ctx, stride, 
dw_addr,
+ vertex_index, 
param_index,
+ driver_location,
+ 
info->output_semantic_name,
+ 
info->output_semantic_index,
+ is_patch);
+
+   skip_lds_store = !info->reads_pervertex_outputs;
+   } else {
+   dw_addr = get_tcs_out_current_patch_data_offset(ctx);
+   dw_addr = get_dw_address_from_generic_indices(ctx, NULL, 
dw_addr,
+ vertex_index, 
param_index,
+ driver_location,
+ 
info->output_semantic_name,
+ 
info->output_semantic_index,
+ is_patch);
+
+   skip_lds_store = !info->reads_perpatch_outputs;
+
+   if (!param_index) {
+   int name = info->output_semantic_name[driver_location];
+
+   /* Always write tess factors into LDS for the TCS 
epilog. */
+   if (name == TGSI_SEMANTIC_TESSINNER ||
+   name == TGSI_SEMANTIC_TESSOUTER) {
+   /* The epilog doesn't read LDS if invocation 0 
defines tess factors. */
+   skip_lds_store = 
!info->reads_tessfactor_outputs &&
+
ctx->shader->selector->tcs_info.tessfactors_are_def_in_all_invocs;
+   is_tess_factor = true;
+   is_tess_inner = name == TGSI_SEMANTIC_TESSINNER;
+   }
+   }
+   }
+
+   buffer = desc_from_addr_base64k(ctx, 
ctx->param_tcs_offchip_addr_base64k);
+
+   base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset);
+
+   addr = get_tcs_tes_buffer_address_from_generic_indices(ctx, 
vertex_index,
+  param_index, 
driver_location,
+  
info->output_semantic_name,
+   

[Mesa-dev] [PATCH v2 03/20] ac: call load_tcs_input() via the abi

2017-12-12 Thread Timothy Arceri
This also enables some code sharing with tes.

V2: drop type param and just use ctx->i32
---
 src/amd/common/ac_nir_to_llvm.c | 36 +---
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index bad3badfc94..fd2151fcf00 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2700,49 +2700,47 @@ get_dw_address(struct nir_to_llvm_context *ctx,
dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
   LLVMConstInt(ctx->ac.i32, param * 4, false), "");
 
if (const_index && compact_const_index)
dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
   LLVMConstInt(ctx->ac.i32, const_index, 
false), "");
return dw_addr;
 }
 
 static LLVMValueRef
-load_tcs_input(struct nir_to_llvm_context *ctx,
-  nir_intrinsic_instr *instr)
+load_tcs_input(struct ac_shader_abi *abi,
+  LLVMValueRef vertex_index,
+  LLVMValueRef indir_index,
+  unsigned const_index,
+  unsigned location,
+  unsigned driver_location,
+  unsigned component,
+  unsigned num_components,
+  bool is_patch,
+  bool is_compact)
 {
+   struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
LLVMValueRef dw_addr, stride;
-   unsigned const_index;
-   LLVMValueRef vertex_index;
-   LLVMValueRef indir_index;
-   unsigned param;
LLVMValueRef value[4], result;
-   const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, 
ctx->stage);
-   const bool is_compact = instr->variables[0]->var->data.compact;
-   param = 
shader_io_get_unique_index(instr->variables[0]->var->data.location);
-   get_deref_offset(ctx->nir, instr->variables[0],
-false, NULL, per_vertex ? &vertex_index : NULL,
-&const_index, &indir_index);
+   unsigned param = shader_io_get_unique_index(location);
 
stride = unpack_param(&ctx->ac, ctx->tcs_in_layout, 13, 8);
dw_addr = get_tcs_in_current_patch_offset(ctx);
dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, 
vertex_index, stride,
 indir_index);
 
-   unsigned comp = instr->variables[0]->var->data.location_frac;
-   for (unsigned i = 0; i < instr->num_components + comp; i++) {
+   for (unsigned i = 0; i < num_components + component; i++) {
value[i] = ac_lds_load(&ctx->ac, dw_addr);
dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
   ctx->ac.i32_1, "");
}
-   result = ac_build_varying_gather_values(&ctx->ac, value, 
instr->num_components, comp);
-   result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, 
&instr->dest.ssa), "");
+   result = ac_build_varying_gather_values(&ctx->ac, value, 
num_components, component);
return result;
 }
 
 static LLVMValueRef
 load_tcs_output(struct nir_to_llvm_context *ctx,
   nir_intrinsic_instr *instr)
 {
LLVMValueRef dw_addr;
LLVMValueRef stride = NULL;
LLVMValueRef value[4], result;
@@ -2995,23 +2993,22 @@ static LLVMValueRef visit_load_var(struct 
ac_nir_context *ctx,
bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
 instr->variables[0]->var->data.mode == nir_var_shader_in;
get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL,
  &const_index, &indir_index);
 
if (instr->dest.ssa.bit_size == 64)
ve *= 2;
 
switch (instr->variables[0]->var->data.mode) {
case nir_var_shader_in:
-   if (ctx->stage == MESA_SHADER_TESS_CTRL)
-   return load_tcs_input(ctx->nctx, instr);
-   if (ctx->stage == MESA_SHADER_TESS_EVAL) {
+   if (ctx->stage == MESA_SHADER_TESS_CTRL ||
+   ctx->stage == MESA_SHADER_TESS_EVAL) {
LLVMValueRef result;
LLVMValueRef vertex_index = NULL;
LLVMValueRef indir_index = NULL;
unsigned const_index = 0;
unsigned location = 
instr->variables[0]->var->data.location;
unsigned driver_location = 
instr->variables[0]->var->data.driver_location;
const bool is_patch =  
instr->variables[0]->var->data.patch;
const bool is_compact = 
instr->variables[0]->var->data.compact;
 
get_deref_offset(ctx, instr->variables[0],
@@ -6599,20 +6596,21 @@ LLVMModuleRef 
ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
ctx.num_output_clips = 
shaders[i]->info.clip_distance_array_size;
ctx.num_output

[Mesa-dev] [PATCH v2 01/20] radeonsi: add si_nir_load_input_tes()

2017-12-12 Thread Timothy Arceri
V2: drop type param and just use ctx->i32
---
 src/gallium/drivers/radeonsi/si_shader.c  | 48 +++
 src/gallium/drivers/radeonsi/si_shader_internal.h | 11 ++
 2 files changed, 59 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 00770547499..d8e00da66c0 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1221,20 +1221,68 @@ static LLVMValueRef fetch_input_tes(
 
buffer = desc_from_addr_base64k(ctx, 
ctx->param_tcs_offchip_addr_base64k);
 
base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset);
addr = get_tcs_tes_buffer_address_from_reg(ctx, NULL, reg);
 
return buffer_load(bld_base, tgsi2llvmtype(bld_base, type), swizzle,
   buffer, base, addr, true);
 }
 
+LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi,
+  LLVMValueRef vertex_index,
+  LLVMValueRef param_index,
+  unsigned const_index,
+  unsigned location,
+  unsigned driver_location,
+  unsigned component,
+  unsigned num_components,
+  bool is_patch,
+  bool is_compact)
+{
+   struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+   struct tgsi_shader_info *info = &ctx->shader->selector->info;
+   LLVMValueRef buffer, base, addr;
+
+   driver_location = driver_location / 4;
+
+   buffer = desc_from_addr_base64k(ctx, 
ctx->param_tcs_offchip_addr_base64k);
+
+   base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset);
+
+   if (param_index) {
+   /* Add the constant index to the indirect index */
+   param_index = LLVMBuildAdd(ctx->ac.builder, param_index,
+  LLVMConstInt(ctx->i32, const_index, 
0), "");
+   } else {
+   param_index = LLVMConstInt(ctx->i32, const_index, 0);
+   }
+
+   addr = get_tcs_tes_buffer_address_from_generic_indices(ctx, 
vertex_index,
+  param_index, 
driver_location,
+  
info->input_semantic_name,
+  
info->input_semantic_index,
+  is_patch);
+
+   /* TODO: This will generate rather ordinary llvm code, although it
+* should be easy for the optimiser to fix up. In future we might want
+* to refactor buffer_load(), but for now this maximises code sharing
+* between the NIR and TGSI backends.
+*/
+   LLVMValueRef value[4];
+   for (unsigned i = component; i < num_components + component; i++) {
+   value[i] = buffer_load(&ctx->bld_base, ctx->i32, i, buffer, 
base, addr, true);
+   }
+
+   return ac_build_varying_gather_values(&ctx->ac, value, num_components, 
component);
+}
+
 static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
 const struct tgsi_full_instruction *inst,
 const struct tgsi_opcode_info *info,
 unsigned index,
 LLVMValueRef dst[4])
 {
struct si_shader_context *ctx = si_shader_context(bld_base);
const struct tgsi_full_dst_register *reg = &inst->Dst[index];
const struct tgsi_shader_info *sh_info = &ctx->shader->selector->info;
unsigned chan_index;
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h 
b/src/gallium/drivers/radeonsi/si_shader_internal.h
index f50a022db85..3fbfea7752b 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -270,20 +270,31 @@ void si_llvm_optimize_module(struct si_shader_context 
*ctx);
 LLVMValueRef si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
  LLVMTypeRef type,
  LLVMValueRef ptr,
  LLVMValueRef ptr2);
 
 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_src_register *reg,
enum tgsi_opcode_type type,
unsigned swizzle);
 
+LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi,
+  LLVMValueRef vertex_index,
+  LLVMValueRef param_index,
+  unsigned const_index,
+  unsigned location,
+  un

[Mesa-dev] [PATCH v2 04/20] radeonsi: add get_dw_address_from_generic_indices() helper

2017-12-12 Thread Timothy Arceri
This will be used by both the tgsi and nir backends.

Reviewed-by: Nicolai Hähnle 
---
 src/gallium/drivers/radeonsi/si_shader.c | 76 +++-
 1 file changed, 46 insertions(+), 30 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 9e16b92a5a1..5ef95589c2a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -819,108 +819,124 @@ LLVMValueRef si_get_indirect_index(struct 
si_shader_context *ctx,
  */
 LLVMValueRef si_get_bounded_indirect_index(struct si_shader_context *ctx,
   const struct tgsi_ind_register *ind,
   int rel_index, unsigned num)
 {
LLVMValueRef result = si_get_indirect_index(ctx, ind, 1, rel_index);
 
return si_llvm_bound_index(ctx, result, num);
 }
 
+static LLVMValueRef get_dw_address_from_generic_indices(struct 
si_shader_context *ctx,
+   LLVMValueRef 
vertex_dw_stride,
+   LLVMValueRef base_addr,
+   LLVMValueRef 
vertex_index,
+   LLVMValueRef 
param_index,
+   unsigned input_index,
+   ubyte *name,
+   ubyte *index,
+   bool is_patch)
+{
+   if (vertex_dw_stride) {
+   base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
+LLVMBuildMul(ctx->ac.builder, 
vertex_index,
+ vertex_dw_stride, ""), 
"");
+   }
+
+   if (param_index) {
+   base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
+LLVMBuildMul(ctx->ac.builder, 
param_index,
+ LLVMConstInt(ctx->i32, 4, 
0), ""), "");
+   }
+
+   int param = is_patch ?
+   si_shader_io_get_unique_index_patch(name[input_index],
+   index[input_index]) :
+   si_shader_io_get_unique_index(name[input_index],
+ index[input_index]);
+
+   /* Add the base address of the element. */
+   return LLVMBuildAdd(ctx->ac.builder, base_addr,
+   LLVMConstInt(ctx->i32, param * 4, 0), "");
+}
 
 /**
  * Calculate a dword address given an input or output register and a stride.
  */
 static LLVMValueRef get_dw_address(struct si_shader_context *ctx,
   const struct tgsi_full_dst_register *dst,
   const struct tgsi_full_src_register *src,
   LLVMValueRef vertex_dw_stride,
   LLVMValueRef base_addr)
 {
struct tgsi_shader_info *info = &ctx->shader->selector->info;
ubyte *name, *index, *array_first;
-   int first, param;
+   int input_index;
struct tgsi_full_dst_register reg;
+   LLVMValueRef vertex_index = NULL;
+   LLVMValueRef ind_index = NULL;
 
/* Set the register description. The address computation is the same
 * for sources and destinations. */
if (src) {
reg.Register.File = src->Register.File;
reg.Register.Index = src->Register.Index;
reg.Register.Indirect = src->Register.Indirect;
reg.Register.Dimension = src->Register.Dimension;
reg.Indirect = src->Indirect;
reg.Dimension = src->Dimension;
reg.DimIndirect = src->DimIndirect;
} else
reg = *dst;
 
/* If the register is 2-dimensional (e.g. an array of vertices
 * in a primitive), calculate the base address of the vertex. */
if (reg.Register.Dimension) {
-   LLVMValueRef index;
-
if (reg.Dimension.Indirect)
-   index = si_get_indirect_index(ctx, ®.DimIndirect,
+   vertex_index = si_get_indirect_index(ctx, 
®.DimIndirect,
  1, reg.Dimension.Index);
else
-   index = LLVMConstInt(ctx->i32, reg.Dimension.Index, 0);
-
-   base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
-LLVMBuildMul(ctx->ac.builder, index,
- vertex_dw_stride, ""), 
"");
+   vertex_index = LLVMConstInt(ctx->i32, 
reg.Dimension.Index, 0);
}
 
/* Get information about the register. */
if (reg.Register

[Mesa-dev] [PATCH v2 02/20] ac: add load_tes_inputs() to the abi

2017-12-12 Thread Timothy Arceri
V2: drop type param and just use ctx->i32
---
 src/amd/common/ac_nir_to_llvm.c  | 62 
 src/amd/common/ac_shader_abi.h   | 11 ++
 src/gallium/drivers/radeonsi/si_shader.c |  1 +
 3 files changed, 52 insertions(+), 22 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index f3602a267de..bad3badfc94 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2852,53 +2852,50 @@ store_tcs_output(struct nir_to_llvm_context *ctx,
}
 
if (writemask == 0xF) {
ac_build_buffer_store_dword(&ctx->ac, 
ctx->hs_ring_tess_offchip, src, 4,
buf_addr, ctx->oc_lds,
(base * 4), 1, 0, true, false);
}
 }
 
 static LLVMValueRef
-load_tes_input(struct nir_to_llvm_context *ctx,
-  const nir_intrinsic_instr *instr)
+load_tes_input(struct ac_shader_abi *abi,
+  LLVMValueRef vertex_index,
+  LLVMValueRef param_index,
+  unsigned const_index,
+  unsigned location,
+  unsigned driver_location,
+  unsigned component,
+  unsigned num_components,
+  bool is_patch,
+  bool is_compact)
 {
+   struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
LLVMValueRef buf_addr;
LLVMValueRef result;
-   LLVMValueRef vertex_index = NULL;
-   LLVMValueRef indir_index = NULL;
-   unsigned const_index = 0;
-   unsigned param;
-   const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, 
ctx->stage);
-   const bool is_compact = instr->variables[0]->var->data.compact;
+   unsigned param = shader_io_get_unique_index(location);
 
-   get_deref_offset(ctx->nir, instr->variables[0],
-false, NULL, per_vertex ? &vertex_index : NULL,
-&const_index, &indir_index);
-   param = 
shader_io_get_unique_index(instr->variables[0]->var->data.location);
-   if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 
&&
-   is_compact && const_index > 3) {
+   if (location == VARYING_SLOT_CLIP_DIST0 && is_compact && const_index > 
3) {
const_index -= 3;
param++;
}
 
-   unsigned comp = instr->variables[0]->var->data.location_frac;
buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index,
-is_compact, vertex_index, 
indir_index);
+is_compact, vertex_index, 
param_index);
 
-   LLVMValueRef comp_offset = LLVMConstInt(ctx->ac.i32, comp * 4, false);
+   LLVMValueRef comp_offset = LLVMConstInt(ctx->ac.i32, component * 4, 
false);
buf_addr = LLVMBuildAdd(ctx->builder, buf_addr, comp_offset, "");
 
-   result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, 
instr->num_components, NULL,
+   result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, 
num_components, NULL,
  buf_addr, ctx->oc_lds, is_compact ? (4 * 
const_index) : 0, 1, 0, true, false);
-   result = trim_vector(&ctx->ac, result, instr->num_components);
-   result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, 
&instr->dest.ssa), "");
+   result = trim_vector(&ctx->ac, result, num_components);
return result;
 }
 
 static LLVMValueRef
 load_gs_input(struct ac_shader_abi *abi,
  unsigned location,
  unsigned driver_location,
  unsigned component,
  unsigned num_components,
  unsigned vertex_index,
@@ -3000,22 +2997,42 @@ static LLVMValueRef visit_load_var(struct 
ac_nir_context *ctx,
get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL,
  &const_index, &indir_index);
 
if (instr->dest.ssa.bit_size == 64)
ve *= 2;
 
switch (instr->variables[0]->var->data.mode) {
case nir_var_shader_in:
if (ctx->stage == MESA_SHADER_TESS_CTRL)
return load_tcs_input(ctx->nctx, instr);
-   if (ctx->stage == MESA_SHADER_TESS_EVAL)
-   return load_tes_input(ctx->nctx, instr);
+   if (ctx->stage == MESA_SHADER_TESS_EVAL) {
+   LLVMValueRef result;
+   LLVMValueRef vertex_index = NULL;
+   LLVMValueRef indir_index = NULL;
+   unsigned const_index = 0;
+   unsigned location = 
instr->variables[0]->var->data.location;
+   unsigned driver_location = 
instr->variables[0]->var->data.driver_location;
+   const bool is_patch =  
instr->variables[0]->var->data.patch;
+ 

[Mesa-dev] [PATCH v2 06/20] ac: add store_tcs_outputs() to the abi

2017-12-12 Thread Timothy Arceri
Reviewed-by: Nicolai Hähnle 
---
 src/amd/common/ac_nir_to_llvm.c | 63 +
 src/amd/common/ac_shader_abi.h  | 12 
 2 files changed, 51 insertions(+), 24 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index fd2151fcf00..6f84604d54a 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2770,79 +2770,78 @@ load_tcs_output(struct nir_to_llvm_context *ctx,
value[i] = ac_lds_load(&ctx->ac, dw_addr);
dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
   ctx->ac.i32_1, "");
}
result = ac_build_varying_gather_values(&ctx->ac, value, 
instr->num_components, comp);
result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, 
&instr->dest.ssa), "");
return result;
 }
 
 static void
-store_tcs_output(struct nir_to_llvm_context *ctx,
-nir_intrinsic_instr *instr,
+store_tcs_output(struct ac_shader_abi *abi,
+LLVMValueRef vertex_index,
+LLVMValueRef param_index,
+unsigned const_index,
+unsigned location,
+unsigned driver_location,
 LLVMValueRef src,
+unsigned component,
+bool is_patch,
+bool is_compact,
 unsigned writemask)
 {
+   struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
LLVMValueRef dw_addr;
LLVMValueRef stride = NULL;
LLVMValueRef buf_addr = NULL;
-   LLVMValueRef vertex_index = NULL;
-   LLVMValueRef indir_index = NULL;
-   unsigned const_index = 0;
unsigned param;
-   const unsigned comp = instr->variables[0]->var->data.location_frac;
-   const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, 
ctx->stage);
-   const bool is_compact = instr->variables[0]->var->data.compact;
bool store_lds = true;
 
-   if (instr->variables[0]->var->data.patch) {
-   if (!(ctx->tcs_patch_outputs_read & (1U << 
instr->variables[0]->var->data.location)))
+   if (is_patch) {
+   if (!(ctx->tcs_patch_outputs_read & (1U << location)))
store_lds = false;
} else {
-   if (!(ctx->tcs_outputs_read & (1ULL << 
instr->variables[0]->var->data.location)))
+   if (!(ctx->tcs_outputs_read & (1ULL << location)))
store_lds = false;
}
-   get_deref_offset(ctx->nir, instr->variables[0],
-false, NULL, per_vertex ? &vertex_index : NULL,
-&const_index, &indir_index);
 
-   param = 
shader_io_get_unique_index(instr->variables[0]->var->data.location);
-   if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 
&&
+   param = shader_io_get_unique_index(location);
+   if (location == VARYING_SLOT_CLIP_DIST0 &&
is_compact && const_index > 3) {
const_index -= 3;
param++;
}
 
-   if (!instr->variables[0]->var->data.patch) {
+   if (!is_patch) {
stride = unpack_param(&ctx->ac, ctx->tcs_out_layout, 13, 8);
dw_addr = get_tcs_out_current_patch_offset(ctx);
} else {
dw_addr = get_tcs_out_current_patch_data_offset(ctx);
}
 
-   mark_tess_output(ctx, instr->variables[0]->var->data.patch, param);
+   mark_tess_output(ctx, is_patch, param);
 
dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, 
vertex_index, stride,
-indir_index);
+param_index);
buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, 
is_compact,
-vertex_index, indir_index);
+vertex_index, param_index);
 
bool is_tess_factor = false;
-   if (instr->variables[0]->var->data.location == 
VARYING_SLOT_TESS_LEVEL_INNER ||
-   instr->variables[0]->var->data.location == 
VARYING_SLOT_TESS_LEVEL_OUTER)
+   if (location == VARYING_SLOT_TESS_LEVEL_INNER ||
+   location == VARYING_SLOT_TESS_LEVEL_OUTER)
is_tess_factor = true;
 
unsigned base = is_compact ? const_index : 0;
for (unsigned chan = 0; chan < 8; chan++) {
if (!(writemask & (1 << chan)))
continue;
-   LLVMValueRef value = llvm_extract_elem(&ctx->ac, src, chan - 
comp);
+   LLVMValueRef value = llvm_extract_elem(&ctx->ac, src, chan - 
component);
 
if (store_lds || is_tess_factor)
ac_lds_store(&ctx->ac, dw_addr, value);
 
if (!is_tess_factor && writemask != 0xF)
ac_build_buffer_store_dword(&ctx->ac, 
ctx

[Mesa-dev] [PATCH v2 05/20] radeonsi: add si_nir_load_input_tcs()

2017-12-12 Thread Timothy Arceri
V2: drop type param and just use ctx->i32
---
 src/gallium/drivers/radeonsi/si_shader.c | 45 
 1 file changed, 45 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 5ef95589c2a..c5ca699ac00 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1200,20 +1200,64 @@ static LLVMValueRef fetch_input_tcs(
struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef dw_addr, stride;
 
stride = get_tcs_in_vertex_dw_stride(ctx);
dw_addr = get_tcs_in_current_patch_offset(ctx);
dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr);
 
return lds_load(bld_base, tgsi2llvmtype(bld_base, type), swizzle, 
dw_addr);
 }
 
+static LLVMValueRef si_nir_load_input_tcs(struct ac_shader_abi *abi,
+ LLVMValueRef vertex_index,
+ LLVMValueRef param_index,
+ unsigned const_index,
+ unsigned location,
+ unsigned driver_location,
+ unsigned component,
+ unsigned num_components,
+ bool is_patch,
+ bool is_compact)
+{
+   struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+   struct tgsi_shader_info *info = &ctx->shader->selector->info;
+   struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
+   LLVMValueRef dw_addr, stride;
+
+   driver_location = driver_location / 4;
+
+   stride = get_tcs_in_vertex_dw_stride(ctx);
+   dw_addr = get_tcs_in_current_patch_offset(ctx);
+
+   if (param_index) {
+   /* Add the constant index to the indirect index */
+   param_index = LLVMBuildAdd(ctx->ac.builder, param_index,
+  LLVMConstInt(ctx->i32, const_index, 
0), "");
+   } else {
+   param_index = LLVMConstInt(ctx->i32, const_index, 0);
+   }
+
+   dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr,
+ vertex_index, param_index,
+ driver_location,
+ info->input_semantic_name,
+ 
info->input_semantic_index,
+ is_patch);
+
+   LLVMValueRef value[4];
+   for (unsigned i = 0; i < num_components + component; i++) {
+   value[i] = lds_load(bld_base, ctx->i32, i, dw_addr);
+   }
+
+   return ac_build_varying_gather_values(&ctx->ac, value, num_components, 
component);
+}
+
 static LLVMValueRef fetch_output_tcs(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_src_register *reg,
enum tgsi_opcode_type type, unsigned swizzle)
 {
struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef dw_addr, stride;
 
if (reg->Register.Dimension) {
stride = get_tcs_out_vertex_dw_stride(ctx);
@@ -5865,20 +5909,21 @@ static bool si_compile_tgsi_main(struct 
si_shader_context *ctx,
if (shader->key.as_ls)
ctx->abi.emit_outputs = si_llvm_emit_ls_epilogue;
else if (shader->key.as_es)
ctx->abi.emit_outputs = si_llvm_emit_es_epilogue;
else
ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
bld_base->emit_epilogue = si_tgsi_emit_epilogue;
break;
case PIPE_SHADER_TESS_CTRL:
bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tcs;
+   ctx->abi.load_tess_inputs = si_nir_load_input_tcs;
bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = fetch_output_tcs;
bld_base->emit_store = store_output_tcs;
bld_base->emit_epilogue = si_llvm_emit_tcs_epilogue;
break;
case PIPE_SHADER_TESS_EVAL:
bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tes;
ctx->abi.load_tess_inputs = si_nir_load_input_tes;
if (shader->key.as_es)
ctx->abi.emit_outputs = si_llvm_emit_es_epilogue;
else
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 07/20] ac: move some helpers to ac_llvm_build.c

2017-12-12 Thread Timothy Arceri
We will call these from the radeonsi NIR backend.

Reviewed-by: Nicolai Hähnle 
---
 src/amd/common/ac_llvm_build.c  | 24 +
 src/amd/common/ac_llvm_build.h  |  8 ++
 src/amd/common/ac_nir_to_llvm.c | 58 +
 3 files changed, 50 insertions(+), 40 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index b2bf1bf7b51..faa08b6301c 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -91,20 +91,44 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, 
LLVMContextRef context,
 
args[0] = LLVMConstReal(ctx->f32, 2.5);
ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);
 
ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context,
"amdgpu.uniform", 14);
 
ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
 }
 
+int
+ac_get_llvm_num_components(LLVMValueRef value)
+{
+   LLVMTypeRef type = LLVMTypeOf(value);
+   unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
+ ? LLVMGetVectorSize(type)
+ : 1;
+   return num_components;
+}
+
+LLVMValueRef
+ac_llvm_extract_elem(struct ac_llvm_context *ac,
+LLVMValueRef value,
+int index)
+{
+   int count = ac_get_llvm_num_components(value);
+
+   if (count == 1)
+   return value;
+
+   return LLVMBuildExtractElement(ac->builder, value,
+  LLVMConstInt(ac->i32, index, false), "");
+}
+
 unsigned
 ac_get_type_size(LLVMTypeRef type)
 {
LLVMTypeKind kind = LLVMGetTypeKind(type);
 
switch (kind) {
case LLVMIntegerTypeKind:
return LLVMGetIntTypeWidth(type) / 8;
case LLVMFloatTypeKind:
return 4;
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 655dc1dcc86..c14b0d9f019 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -75,20 +75,28 @@ struct ac_llvm_context {
 
enum chip_class chip_class;
 
LLVMValueRef lds;
 };
 
 void
 ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
 enum chip_class chip_class);
 
+int
+ac_get_llvm_num_components(LLVMValueRef value);
+
+LLVMValueRef
+ac_llvm_extract_elem(struct ac_llvm_context *ac,
+LLVMValueRef value,
+int index);
+
 unsigned ac_get_type_size(LLVMTypeRef type);
 
 LLVMTypeRef ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
 LLVMValueRef ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v);
 LLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
 LLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v);
 
 LLVMValueRef
 ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
   LLVMTypeRef return_type, LLVMValueRef *params,
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 6f84604d54a..6060df75314 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -995,46 +995,24 @@ static void create_function(struct nir_to_llvm_context 
*ctx,
set_userdata_location_shader(ctx, 
AC_UD_PS_SAMPLE_POS_OFFSET, &user_sgpr_idx, 1);
}
break;
default:
unreachable("Shader stage not implemented");
}
 
ctx->shader_info->num_user_sgprs = user_sgpr_idx;
 }
 
-static int get_llvm_num_components(LLVMValueRef value)
-{
-   LLVMTypeRef type = LLVMTypeOf(value);
-   unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
- ? LLVMGetVectorSize(type)
- : 1;
-   return num_components;
-}
-
-static LLVMValueRef llvm_extract_elem(struct ac_llvm_context *ac,
- LLVMValueRef value,
- int index)
-{
-   int count = get_llvm_num_components(value);
-
-   if (count == 1)
-   return value;
-
-   return LLVMBuildExtractElement(ac->builder, value,
-  LLVMConstInt(ac->i32, index, false), "");
-}
-
 static LLVMValueRef trim_vector(struct ac_llvm_context *ctx,
 LLVMValueRef value, unsigned count)
 {
-   unsigned num_components = get_llvm_num_components(value);
+   unsigned num_components = ac_get_llvm_num_components(value);
if (count == num_components)
return value;
 
LLVMValueRef masks[] = {
LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)};
 
if (count == 1)
return LLVM

Re: [Mesa-dev] [PATCH 3/3] i965: if DEBUG_OUT_OF_BOUND_CHK is up, check that noise padding for each bo used in batchbuffer is correct

2017-12-12 Thread Rogovin, Kevin
Hi,

 Just got confirmation that kernel does the syncing required to make sure that 
pread values are realiable.

-Kevin

-Original Message-
From: Rogovin, Kevin 
Sent: Wednesday, December 13, 2017 8:19 AM
To: 'Jason Ekstrand' 
Cc: mesa-dev@lists.freedesktop.org; Lahtinen, Joonas 
Subject: RE: [Mesa-dev] [PATCH 3/3] i965: if DEBUG_OUT_OF_BOUND_CHK is up, 
check that noise padding for each bo used in batchbuffer is correct

Hi,

> I think you want to do this at the end of submit_batch instead and add 
> a brw_bo_wait_rendering on the batch. Otherwise, your bounds checking is 
> racing with the GPU.

I remember being told that pread has the kernel do the required waiting, 
however I am not 100% sure of this (which is why I cc'd Joonas to either 
confirm or deny the assertion). Joonas?
 
-Kevin

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] i965: if DEBUG_OUT_OF_BOUND_CHK is up, check that noise padding for each bo used in batchbuffer is correct

2017-12-12 Thread Rogovin, Kevin
Hi,

> I think you want to do this at the end of submit_batch instead and add a 
> brw_bo_wait_rendering on the batch.  
> Otherwise, your bounds checking is racing with the GPU.

I remember being told that pread has the kernel do the required waiting, 
however I am not 100% sure of this (which is why I cc'd Joonas to either 
confirm or deny the assertion). Joonas?
 
-Kevin

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] i965: add noise padding to buffer object and function to check if noise is correct

2017-12-12 Thread Rogovin, Kevin
Hi,

 Thankyou for reading the code and giving advice to improve upon it. Below are 
some thoughts:

> I can't help but think that this could be a bit simpler and involve throwing 
> fewer pointers around.

I was thinking this too; the easiest way to do this is to just have the same 
noise for all the paddings;
that would mean there is just one pointer of data and that would be a private 
member of brw_bufmgr.

> This is 4096.  I think we could just have a single uint32_t padding field 
> which is either 0 or 4096 (More on that later).

If the kernel supports huge pages, though now there might be "two" different 
things as far as page size goes then:
the page size for CPU things and the page size for the PPGTT. I don't know if 
they must be the same or if they can
be different. I also do not know how to actually get the page size used by the 
PPGTT, as getpagesize() is the page
size for the CPU page tabling magic. Any suggestions on how to get the page 
size used for the PPGTT? I think it is
worthwhile to make sure that atleast some of the noise is in the next page, but 
I admit that is just a hunchy thing.

> Does using rand() really help us?  Why not just come up with some hash-like 
> thing which generates consistent pseudo-random data? 
> How about something like "value.[i] = i * 853 + 193"  (some random primes)?  
> That would mean that we can generate the data and check
> it without having to store it in a per-bo temporary.  If you want it to be 
> magic per-bo, you could also seed it somehow with the bo handl
> (just add handle * 607).

I figured that rand() was the most reliable way to generate noise in addition 
to the least amount of code. However, if the padding values
are generated with an internal routine (maybe something like value[i] = 223 * 
value[i - 1] + 123, with value[0] =  handle; all truncated to 8-bits),
that would drop the need completely for the per-buffer storage.


> If we always allocate 4096B of padding, then you don't need to heap allocate 
> it and can just put it on the stack for the purpose of interacting with
 > pread/pwrite.  It's a bit big but still perfectly reasonable.  If a day came 
 > when we wanted to make the padding size adjustable, it would still 
> probably be reasonable to make the heap allocations temporary so we have less 
> random stuff in the BO we have to cleanup.

I was tempted to make it stack allocated, but the 4096 size scared me off... 
and when I thought of huge page support of 2M I ran screaming.
 
> There's a part of me that wants to kill pread/write.  However, I think you 
> may have come up with the only good use of it I've ever seen. :-)

I was told that one of the advantages of pread is the kernel will then do the 
syncing magic for you, i.e. waiting for the GPU to be done with the
buffer;  I freely admit that now I am not 100% sure of this.
 
> If we still keep these heap allocations, deleting them should be keyed off of 
> bo->padding.size or nothing at all.

That is how I originally wrote it, but to handle the case where creating a 
brw_bo fails midway (i.e. after GEM create ioctl, but during the tmp buffer 
allocation), checking the existence by .size > 0 was not going to work. 
However, for everywhere else it is fine.
 
At this point I am tempted to do the following for the noise padding:
  1. take your suggestion and make the noise per brw_bo, but the noise is 
generated with an incremental chaotic function that uses GEM-handle value as a 
start
  2. have a single integer in the brw_bo struct indicating the amount of noise 
padding it has;
  3. for checking use that single integer to heap-allocate a temporary buffer 
to store the pread contents OR have the necessary syncing operations and use 
the mapping pointer to read the values. The latter is more tempting I admit 
though.

-Kevin
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] program: Don't reset SamplersValidated when restoring from shader cache

2017-12-12 Thread Timothy Arceri

Reviewed-by: Timothy Arceri 

On 13/12/17 09:20, Jordan Justen wrote:

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103988
Signed-off-by: Jordan Justen 
---
  src/mesa/program/ir_to_mesa.cpp | 16 +---
  1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 5f663b3d09f..051ed2225b7 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -3115,15 +3115,17 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
link_shaders(ctx, prog);
 }
  
-   if (prog->data->LinkStatus) {

-  /* Reset sampler validated to true, validation happens via the
-   * LinkShader call below.
-   */
+   /* If LinkStatus is linking_success, then reset sampler validated to true,
+* validation happens via the LinkShader call below. If LinkStatus is
+* linking_skipped, then SamplersValidated will have been restored from the
+* shader cache.
+*/
+   if (prog->data->LinkStatus == linking_success) {
prog->SamplersValidated = GL_TRUE;
+   }
  
-  if (!ctx->Driver.LinkShader(ctx, prog)) {

- prog->data->LinkStatus = linking_failure;
-  }
+   if (prog->data->LinkStatus && !ctx->Driver.LinkShader(ctx, prog)) {
+  prog->data->LinkStatus = linking_failure;
 }
  
 /* Return early if we are loading the shader from on-disk cache */



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] meson: build clover

2017-12-12 Thread Aaron Watry
On Tue, 2017-12-12 at 12:08 -0800, Francisco Jerez wrote:
> Dylan Baker  writes:
> 
> > This has only been compile tested.
> > 
> > cc: Curro Jerez 
> > Signed-off-by: Dylan Baker 
> > ---
> >  include/meson.build   |  19 
> >  meson.build   |  27 +-
> >  meson_options.txt |  12 +++
> >  src/gallium/meson.build   |  12 ++-
> >  src/gallium/state_trackers/clover/meson.build | 122
> > ++
> >  src/gallium/targets/opencl/meson.build|  73
> > +++
> >  src/gallium/targets/pipe-loader/meson.build   |  76
> > 
> >  7 files changed, 336 insertions(+), 5 deletions(-)
> >  create mode 100644 src/gallium/state_trackers/clover/meson.build
> >  create mode 100644 src/gallium/targets/opencl/meson.build
> >  create mode 100644 src/gallium/targets/pipe-loader/meson.build
> > 
> > diff --git a/include/meson.build b/include/meson.build
> > index e4dae91cede..a2e7ce6580e 100644
> > --- a/include/meson.build
> > +++ b/include/meson.build
> > @@ -78,3 +78,22 @@ if with_gallium_st_nine
> >  subdir : 'd3dadapter',
> >)
> >  endif
> > +
> > +# Only install the headers if we are building a stand alone
> > implementation and
> > +# not an ICD enabled implementation
> > +if with_gallium_opencl and not with_opencl_icd
> > +  install_headers(
> > +'CL/cl.h',
> > +'CL/cl.hpp',
> > +'CL/cl_d3d10.h',
> > +'CL/cl_d3d11.h',
> > +'CL/cl_dx9_media_sharing.h',
> > +'CL/cl_egl.h',
> > +'CL/cl_ext.h',
> > +'CL/cl_gl.h',
> > +'CL/cl_gl_ext.h',
> > +'CL/cl_platform.h',
> > +'CL/opencl.h',
> > +subdir: 'CL'
> > +  )
> > +endif
> > diff --git a/meson.build b/meson.build
> > index 842d441199e..7892d2d0ec4 100644
> > --- a/meson.build
> > +++ b/meson.build
> > @@ -583,6 +583,20 @@ if with_gallium_st_nine
> >endif
> >  endif
> >  
> > +if get_option('gallium-opencl')
> > +  if not with_gallium
> > +error('OpenCL Clover implementation requires at least one
> > gallium driver.')
> > +  endif
> > +
> > +  # TODO: alitvec?
> > +  dep_clc = dependency('libclc')
> > +  with_gallium_opencl = true
> > +else
> > +  dep_clc = []
> > +  with_gallium_opencl = false
> > +endif
> > +with_opencl_icd = get_option('gallium-opencl-icd')
> > +
> >  gl_pkgconfig_c_flags = []
> >  if with_platform_x11
> >if with_any_vk or (with_glx == 'dri' and with_dri_platform ==
> > 'drm')
> > @@ -930,7 +944,7 @@ dep_thread = dependency('threads')
> >  if dep_thread.found() and host_machine.system() != 'windows'
> >pre_args += '-DHAVE_PTHREAD'
> >  endif
> > -if with_amd_vk or with_gallium_radeonsi or with_gallium_r600 #
> > TODO: clover
> > +if with_amd_vk or with_gallium_radeonsi or with_gallium_r600 or
> > with_gallium_opencl
> >dep_elf = dependency('libelf', required : false)
> >if not dep_elf.found()
> >  dep_elf = cc.find_library('elf')
> > @@ -972,12 +986,19 @@ if with_amd_vk or with_gallium_radeonsi or
> > with_gallium_r600
> >  llvm_modules += 'asmparser'
> >endif
> >  endif
> > +if with_gallium_opencl
> > +  llvm_modules += [
> > +'all-targets', 'linker', 'coverage', 'instrumentation', 'ipo',
> > 'irreader',
> > +'lto', 'option', 'objcarcopts', 'profiledata',
> > +  ]
> > +  # TODO: optional modules
> > +endif
> >  
> >  _llvm = get_option('llvm')
> >  if _llvm == 'auto'
> >dep_llvm = dependency(
> >  'llvm', version : '>= 3.9.0', modules : llvm_modules,
> > -required : with_amd_vk or with_gallium_radeonsi or
> > with_gallium_swr,
> > +required : with_amd_vk or with_gallium_radeonsi or
> > with_gallium_swr or with_gallium_opencl,
> >)
> >with_llvm = dep_llvm.found()
> >  elif _llvm == 'true'
> > @@ -1154,8 +1175,6 @@ else
> >dep_lmsensors = []
> >  endif
> >  
> > -# TODO: clover
> > -
> >  # TODO: gallium tests
> >  
> >  # TODO: various libdirs
> > diff --git a/meson_options.txt b/meson_options.txt
> > index 74fbfbe0330..f7320cb6fb0 100644
> > --- a/meson_options.txt
> > +++ b/meson_options.txt
> > @@ -120,6 +120,18 @@ option(
> >value : false,
> >description : 'build gallium "nine" Direct3D 9.x state
> > tracker.',
> >  )
> > +option(
> > +  'gallium-opencl',
> > +  type : 'boolean',
> > +  value : false,
> > +  description : 'build gallium "clover" OpenCL state tracker.',
> > +)
> > +option(
> > +  'gallium-opencl-icd',
> > +  type : 'boolean',
> > +  value : true,
> > +  description : 'Build gallium "clover" as an ICD library.',
> > +)
> >  option(
> >'d3d-drivers-path',
> >type : 'string',
> > diff --git a/src/gallium/meson.build b/src/gallium/meson.build
> > index fc21dcf03e1..6330c7514af 100644
> > --- a/src/gallium/meson.build
> > +++ b/src/gallium/meson.build
> > @@ -145,7 +145,17 @@ endif
> >  if with_gallium_st_nine
> >subdir('state_trackers/nine')
> >  endif
> > -# TODO: clover
> > +if with_gallium_opencl
> > +  # TODO: this isn't really clover speci

Re: [Mesa-dev] [PATCH] gallium/docs: document behavior of set_sample_mask()

2017-12-12 Thread Roland Scheidegger
Am 13.12.2017 um 04:34 schrieb Brian Paul:
> The sample mask is used even if msaa is not explicity enabled when we
> have a framebuffer with multisampled surfaces.  That's DX behavior and
> what the Radeon drivers do.  Not sure about other drivers at this point.
> ---
>  src/gallium/docs/source/context.rst | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/src/gallium/docs/source/context.rst 
> b/src/gallium/docs/source/context.rst
> index 9d069b3..0ac7340 100644
> --- a/src/gallium/docs/source/context.rst
> +++ b/src/gallium/docs/source/context.rst
> @@ -64,7 +64,10 @@ objects. They all follow simple, one-method binding calls, 
> e.g.
>  * ``set_stencil_ref`` sets the stencil front and back reference values
>which are used as comparison values in stencil test.
>  * ``set_blend_color``
> -* ``set_sample_mask``
> +* ``set_sample_mask``  sets the per-context multisample sample mask.  Note
> +  that this takes effect even if multisampling is not explicitly enabled if
> +  the frambuffer surface(s) are multisampled.  Also, this mask is AND-ed
> +  with the optional fragment shader sample mask output (when emitted).
>  * ``set_min_samples`` sets the minimum number of samples that must be run.
>  * ``set_clip_state``
>  * ``set_polygon_stipple``
> 

Looks alright to me. Though note llvmpipe does this even for
non-multisampled surfaces (as per dx behavior too). But as far as I
know, radeons don't and it would have to be emulated (I suppose the dx10
drivers do), so probably can't really make this gallium behavior (sample
mask is pretty awkward with just one sample in the first place, and I
doubt used in practice).

Reviewed-by: Roland Scheidegger 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] gallivm: fix an issue with NaNs with seamless cube filtering

2017-12-12 Thread Brian Paul
On Tue, Dec 12, 2017 at 7:34 PM,  wrote:

> From: Roland Scheidegger 
>
> Cube texture wrapping is a bit special since the values (post face
> projection) always are within [0,1], so we took advantage of that and
> omitted some clamps.
> However, we can still get NaNs (either because the coords already had NaNs,
> or the face projection generated them), and in fact we didn't handle them
> quite safely. I've seen -INT_MAX + 1 been propagated through as the final
> int
> coord value, albeit I didn't observe a crash. (Not quite a coincidence,
> since
> any stride mul with -INT_MAX or -INT_MAX+1 will turn up as a small positive
> number - nevertheless, I'd rather not try my luck, I'm not entirely sure it
> can't really turn up negative neither due to seamless coord swapping, plus
> ifloor of a NaN is not guaranteed to return -INT_MAX by any standard. And
> we kill off NaNs similarly with ordinary texture wrapping too.)
> So kill off the NaNs by using the common max against zero method.
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 11 +++
>  1 file changed, 11 insertions(+)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> index 571a968..ff8cbf6 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> @@ -1123,6 +1123,17 @@ lp_build_sample_image_linear(struct
> lp_build_sample_context *bld,
> */
>/* should always have normalized coords, and offsets are undefined
> */
>assert(bld->static_sampler_state->normalized_coords);
> +  /*
> +   * The coords should all be between [0,1] however we can have NaNs,
> +   * which will wreak havoc. In particular the y1_clamped value below
> +   * can be -INT_MAX (on x86) and be propagated right through
> (probably
> +   * other values might be bogus in the end too).
> +   * So kill off the NaNs here.
> +   */
> +  coords[0] = lp_build_max_ext(coord_bld, coords[0], coord_bld->zero,
> +   GALLIVM_NAN_RETURN_OTHER_SECO
> ND_NONNAN);
> +  coords[1] = lp_build_max_ext(coord_bld, coords[1], coord_bld->zero,
> +   GALLIVM_NAN_RETURN_OTHER_SECO
> ND_NONNAN);
>coord = lp_build_mul(coord_bld, coords[0], flt_width_vec);
>/* instead of clamp, build mask if overflowed */
>coord = lp_build_sub(coord_bld, coord, half);
> --
>

Would it make sense to have a gallivm helper function for doing this?  Or
two, for min/max?

In any case, for both patches,
Reviewed-by: Brian Paul 


2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] gallium/docs: document behavior of set_sample_mask()

2017-12-12 Thread Brian Paul
The sample mask is used even if msaa is not explicity enabled when we
have a framebuffer with multisampled surfaces.  That's DX behavior and
what the Radeon drivers do.  Not sure about other drivers at this point.
---
 src/gallium/docs/source/context.rst | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/gallium/docs/source/context.rst 
b/src/gallium/docs/source/context.rst
index 9d069b3..0ac7340 100644
--- a/src/gallium/docs/source/context.rst
+++ b/src/gallium/docs/source/context.rst
@@ -64,7 +64,10 @@ objects. They all follow simple, one-method binding calls, 
e.g.
 * ``set_stencil_ref`` sets the stencil front and back reference values
   which are used as comparison values in stencil test.
 * ``set_blend_color``
-* ``set_sample_mask``
+* ``set_sample_mask``  sets the per-context multisample sample mask.  Note
+  that this takes effect even if multisampling is not explicitly enabled if
+  the frambuffer surface(s) are multisampled.  Also, this mask is AND-ed
+  with the optional fragment shader sample mask output (when emitted).
 * ``set_min_samples`` sets the minimum number of samples that must be run.
 * ``set_clip_state``
 * ``set_polygon_stipple``
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] gallivm: implement accurate corner behavior for textureGather with cube maps

2017-12-12 Thread sroland
From: Roland Scheidegger 

The spec says the missing texel (when we wrap around both x and y axis)
should be synthesized as the average of the 3 other texels. For bilinear
filtering however we instead adjusted the filter weights (because, while
the complexity looks similar, there would be 4 times as many color values
to fix up than weights). Obviously this could not work for gather (hence
accurate corner filtering was disabled with gather).
Implement this by just doing it as the spec implies - calculate the 4th
texel as the average of the other 3. With gather of course there's only
one color to worry about, so it's not all that many instructions neither
(albeit surely the whole cube map filtering is hilariously complex).
---
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 304 ++
 1 file changed, 201 insertions(+), 103 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index def731e..571a968 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -1030,20 +1030,13 @@ lp_build_sample_image_linear(struct 
lp_build_sample_context *bld,
LLVMValueRef neighbors[2][2][4];
int chan, texel_index;
boolean seamless_cube_filter, accurate_cube_corners;
+   unsigned chan_swiz = bld->static_texture_state->swizzle_r;
 
seamless_cube_filter = (bld->static_texture_state->target == 
PIPE_TEXTURE_CUBE ||
bld->static_texture_state->target == 
PIPE_TEXTURE_CUBE_ARRAY) &&
   bld->static_sampler_state->seamless_cube_map;
-   /*
-* XXX I don't know how this is really supposed to work with gather. From GL
-* spec wording (not gather specific) it sounds like the 4th missing texel
-* should be an average of the other 3, hence for gather could return this.
-* This is however NOT how the code here works, which just fixes up the
-* weights used for filtering instead. And of course for gather there is
-* no filter to tweak...
-*/
-   accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter &&
-   !is_gather;
+
+   accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter;
 
lp_build_extract_image_sizes(bld,
 &bld->int_size_bld,
@@ -1371,94 +1364,191 @@ lp_build_sample_image_linear(struct 
lp_build_sample_context *bld,
* as well) here.
*/
   if (accurate_cube_corners) {
- LLVMValueRef w00, w01, w10, w11, wx0, wy0;
- LLVMValueRef c_weight, c00, c01, c10, c11;
- LLVMValueRef have_corner, one_third, tmp;
+ LLVMValueRef c00, c01, c10, c11, c00f, c01f, c10f, c11f;
+ LLVMValueRef have_corner, one_third;
 
- colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
- colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
- colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
- colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
+ colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, 
"cs0");
+ colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, 
"cs1");
+ colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, 
"cs2");
+ colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, 
"cs3");
 
  have_corner = LLVMBuildLoad(builder, have_corners, "");
 
  lp_build_if(&corner_if, bld->gallivm, have_corner);
 
- /*
-  * we can't use standard 2d lerp as we need per-element weight
-  * in case of corners, so just calculate bilinear result as
-  * w00*s00 + w01*s01 + w10*s10 + w11*s11.
-  * (This is actually less work than using 2d lerp, 7 vs. 9 
instructions,
-  * however calculating the weights needs another 6, so actually 
probably
-  * not slower than 2d lerp only for 4 channels as weights only need
-  * to be calculated once - of course fixing the weights has 
additional cost.)
-  */
- wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart);
- wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart);
- w00 = lp_build_mul(coord_bld, wx0, wy0);
- w01 = lp_build_mul(coord_bld, s_fpart, wy0);
- w10 = lp_build_mul(coord_bld, wx0, t_fpart);
- w11 = lp_build_mul(coord_bld, s_fpart, t_fpart);
-
- /* find corner weight */
+ one_third = lp_build_const_vec(bld->gallivm, coord_bld->type,
+1.0f/3.0f);
+
+ /* find corner */
  c00 = lp_build_and(ivec_bld, fall_off[0], fall_off[2]);
- c_weight = lp_build_select(coord_bld, c00, w00, coord_bld->zero);
+ c00f = LLVMBuildBitCast(builder, c00, coord_bld->vec_type, "");
  c01 = lp_build_and(ivec_bld, fall_off[1], fall_off[2]);
- 

[Mesa-dev] [PATCH 2/2] gallivm: fix an issue with NaNs with seamless cube filtering

2017-12-12 Thread sroland
From: Roland Scheidegger 

Cube texture wrapping is a bit special since the values (post face
projection) always are within [0,1], so we took advantage of that and
omitted some clamps.
However, we can still get NaNs (either because the coords already had NaNs,
or the face projection generated them), and in fact we didn't handle them
quite safely. I've seen -INT_MAX + 1 been propagated through as the final int
coord value, albeit I didn't observe a crash. (Not quite a coincidence, since
any stride mul with -INT_MAX or -INT_MAX+1 will turn up as a small positive
number - nevertheless, I'd rather not try my luck, I'm not entirely sure it
can't really turn up negative neither due to seamless coord swapping, plus
ifloor of a NaN is not guaranteed to return -INT_MAX by any standard. And
we kill off NaNs similarly with ordinary texture wrapping too.)
So kill off the NaNs by using the common max against zero method.
---
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 571a968..ff8cbf6 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -1123,6 +1123,17 @@ lp_build_sample_image_linear(struct 
lp_build_sample_context *bld,
*/
   /* should always have normalized coords, and offsets are undefined */
   assert(bld->static_sampler_state->normalized_coords);
+  /*
+   * The coords should all be between [0,1] however we can have NaNs,
+   * which will wreak havoc. In particular the y1_clamped value below
+   * can be -INT_MAX (on x86) and be propagated right through (probably
+   * other values might be bogus in the end too).
+   * So kill off the NaNs here.
+   */
+  coords[0] = lp_build_max_ext(coord_bld, coords[0], coord_bld->zero,
+   GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
+  coords[1] = lp_build_max_ext(coord_bld, coords[1], coord_bld->zero,
+   GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
   coord = lp_build_mul(coord_bld, coords[0], flt_width_vec);
   /* instead of clamp, build mask if overflowed */
   coord = lp_build_sub(coord_bld, coord, half);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 101223] Mesa fails to build with clang

2017-12-12 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=101223

Mike Lothian  changed:

   What|Removed |Added

 Resolution|--- |INVALID
 CC||m...@fireburn.co.uk
 Status|NEW |RESOLVED

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] intel/decoder: Convert the iterator to work entirely in bits

2017-12-12 Thread Jason Ekstrand
---
 src/intel/common/gen_decoder.c | 17 -
 src/intel/common/gen_decoder.h |  4 +---
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/src/intel/common/gen_decoder.c b/src/intel/common/gen_decoder.c
index 9f4f503..de4c506 100644
--- a/src/intel/common/gen_decoder.c
+++ b/src/intel/common/gen_decoder.c
@@ -840,9 +840,7 @@ iter_advance_field(struct gen_field_iterator *iter)
 
int group_member_offset = iter_group_offset_bits(iter, iter->group_iter);
 
-   iter->start = group_member_offset + iter->field->start;
-   iter->end = group_member_offset + iter->field->end;
-   iter->dword = iter->start / 32;
+   iter->bit = group_member_offset + iter->field->start;
iter->struct_desc = NULL;
 
return true;
@@ -890,7 +888,7 @@ iter_decode_field(struct gen_field_iterator *iter)
memset(&v, 0, sizeof(v));
 
v.qw = iter_decode_field_raw(iter->field,
-&iter->p[iter->dword], iter->p_end);
+&iter->p[iter->bit / 32], iter->p_end);
 
const char *enum_name = NULL;
 
@@ -1020,17 +1018,18 @@ gen_print_group(FILE *outfile, struct gen_group *group,
 
gen_field_iterator_init(&iter, group, p, color);
do {
-  if (last_dword != iter.dword) {
- for (int i = last_dword + 1; i <= iter.dword; i++)
+  int iter_dword = iter.bit / 32;
+  if (last_dword != iter_dword) {
+ for (int i = last_dword + 1; i <= iter_dword; i++)
 print_dword_header(outfile, &iter, offset, i);
- last_dword = iter.dword;
+ last_dword = iter_dword;
   }
   if (!gen_field_is_header(iter.field)) {
  fprintf(outfile, "%s: %s\n", iter.name, iter.value);
  if (iter.struct_desc) {
-uint64_t struct_offset = offset + 4 * iter.dword;
+uint64_t struct_offset = offset + 4 * iter_dword;
 gen_print_group(outfile, iter.struct_desc, struct_offset,
-&p[iter.dword], color);
+&p[iter_dword], color);
  }
   }
} while (gen_field_iterator_next(&iter));
diff --git a/src/intel/common/gen_decoder.h b/src/intel/common/gen_decoder.h
index bf7cb42..a45e0a2 100644
--- a/src/intel/common/gen_decoder.h
+++ b/src/intel/common/gen_decoder.h
@@ -70,9 +70,7 @@ struct gen_field_iterator {
struct gen_group *struct_desc;
const uint32_t *p;
const uint32_t *p_end;
-   int dword; /**< current field starts at &p[dword] */
-   int start; /**< current field starts at this bit number */
-   int end;   /**< current field ends at this bit number */
+   int bit; /**< current field starts at this bit offset into p */
 
int group_iter;
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] intel/decoder: Drop gen_field_decode helper

2017-12-12 Thread Jason Ekstrand
It's unused
---
 src/intel/common/gen_decoder.c | 8 
 src/intel/common/gen_decoder.h | 3 ---
 2 files changed, 11 deletions(-)

diff --git a/src/intel/common/gen_decoder.c b/src/intel/common/gen_decoder.c
index b3501ae..9f4f503 100644
--- a/src/intel/common/gen_decoder.c
+++ b/src/intel/common/gen_decoder.c
@@ -1011,14 +1011,6 @@ gen_field_is_header(struct gen_field *field)
return (field->parent->opcode_mask & bits) != 0;
 }
 
-void gen_field_decode(struct gen_field *field,
-  const uint32_t *p, const uint32_t *end,
-  union gen_field_value *value)
-{
-   uint32_t dword = field->start / 32;
-   value->u64 = iter_decode_field_raw(field, &p[dword], end);
-}
-
 void
 gen_print_group(FILE *outfile, struct gen_group *group,
 uint64_t offset, const uint32_t *p, bool color)
diff --git a/src/intel/common/gen_decoder.h b/src/intel/common/gen_decoder.h
index 8b00b6e..bf7cb42 100644
--- a/src/intel/common/gen_decoder.h
+++ b/src/intel/common/gen_decoder.h
@@ -62,9 +62,6 @@ struct gen_field *gen_group_find_field(struct gen_group 
*group, const char *name
 struct gen_enum *gen_spec_find_enum(struct gen_spec *spec, const char *name);
 
 bool gen_field_is_header(struct gen_field *field);
-void gen_field_decode(struct gen_field *field,
-  const uint32_t *p, const uint32_t *end,
-  union gen_field_value *value);
 
 struct gen_field_iterator {
struct gen_group *group;
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] intel/decoder: Take a bit offset in gen_print_group

2017-12-12 Thread Jason Ekstrand
Previously, if a group was nested in another group such that it didn't
start on a dword boundary, we would decode it as if it started at the
start of its first dword.  This changes things to work even more in
terms of bits so that we can properly decode these structs.  This
affects MOCS, attribute swizzles, and several other things.
---
 src/intel/common/gen_decoder.c| 15 ---
 src/intel/common/gen_decoder.h|  5 +++--
 src/intel/tools/aubinator.c   |  2 +-
 src/intel/tools/aubinator_error_decode.c  | 18 ++
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 10 +-
 5 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/src/intel/common/gen_decoder.c b/src/intel/common/gen_decoder.c
index 59c76ea..4f373a3 100644
--- a/src/intel/common/gen_decoder.c
+++ b/src/intel/common/gen_decoder.c
@@ -851,8 +851,8 @@ iter_decode_field_raw(struct gen_field_iterator *iter)
 {
uint64_t qw = 0;
 
-   int field_start = iter->bit;
-   int field_end = iter->bit + (iter->field->end - iter->field->start);
+   int field_start = iter->p_bit + iter->bit;
+   int field_end = field_start + (iter->field->end - iter->field->start);
 
const uint32_t *p = iter->p + (iter->bit / 32);
if ((field_end - field_start) > 32) {
@@ -959,7 +959,7 @@ iter_decode_field(struct gen_field_iterator *iter)
 void
 gen_field_iterator_init(struct gen_field_iterator *iter,
 struct gen_group *group,
-const uint32_t *p,
+const uint32_t *p, int p_bit,
 bool print_colors)
 {
memset(iter, 0, sizeof(*iter));
@@ -970,6 +970,7 @@ gen_field_iterator_init(struct gen_field_iterator *iter,
else
   iter->field = group->next->fields;
iter->p = p;
+   iter->p_bit = p_bit;
iter->p_end = &p[gen_group_get_length(iter->group, iter->p)];
iter->print_colors = print_colors;
 
@@ -1011,13 +1012,13 @@ gen_field_is_header(struct gen_field *field)
 }
 
 void
-gen_print_group(FILE *outfile, struct gen_group *group,
-uint64_t offset, const uint32_t *p, bool color)
+gen_print_group(FILE *outfile, struct gen_group *group, uint64_t offset,
+const uint32_t *p, int p_bit, bool color)
 {
struct gen_field_iterator iter;
int last_dword = -1;
 
-   gen_field_iterator_init(&iter, group, p, color);
+   gen_field_iterator_init(&iter, group, p, p_bit, color);
do {
   int iter_dword = iter.bit / 32;
   if (last_dword != iter_dword) {
@@ -1030,7 +1031,7 @@ gen_print_group(FILE *outfile, struct gen_group *group,
  if (iter.struct_desc) {
 uint64_t struct_offset = offset + 4 * iter_dword;
 gen_print_group(outfile, iter.struct_desc, struct_offset,
-&p[iter_dword], color);
+&p[iter_dword], iter.bit % 32, color);
  }
   }
} while (gen_field_iterator_next(&iter));
diff --git a/src/intel/common/gen_decoder.h b/src/intel/common/gen_decoder.h
index a45e0a2..37eebd7 100644
--- a/src/intel/common/gen_decoder.h
+++ b/src/intel/common/gen_decoder.h
@@ -69,6 +69,7 @@ struct gen_field_iterator {
char value[128];
struct gen_group *struct_desc;
const uint32_t *p;
+   int p_bit; /**< bit offset into p */
const uint32_t *p_end;
int bit; /**< current field starts at this bit offset into p */
 
@@ -171,14 +172,14 @@ struct gen_field {
 
 void gen_field_iterator_init(struct gen_field_iterator *iter,
  struct gen_group *group,
- const uint32_t *p,
+ const uint32_t *p, int p_bit,
  bool print_colors);
 
 bool gen_field_iterator_next(struct gen_field_iterator *iter);
 
 void gen_print_group(FILE *out,
  struct gen_group *group,
- uint64_t offset, const uint32_t *p,
+ uint64_t offset, const uint32_t *p, int p_bit,
  bool color);
 
 #ifdef __cplusplus
diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c
index 48d4456..45b9566 100644
--- a/src/intel/tools/aubinator.c
+++ b/src/intel/tools/aubinator.c
@@ -100,7 +100,7 @@ decode_group(struct gen_group *strct, const uint32_t *p, 
int starting_dword)
 {
uint64_t offset = option_print_offsets ? (void *) p - gtt : 0;
 
-   gen_print_group(outfile, strct, offset, p, option_color == COLOR_ALWAYS);
+   gen_print_group(outfile, strct, offset, p, 0, option_color == COLOR_ALWAYS);
 }
 
 static void
diff --git a/src/intel/tools/aubinator_error_decode.c 
b/src/intel/tools/aubinator_error_decode.c
index 91e0845..ecc01b55 100644
--- a/src/intel/tools/aubinator_error_decode.c
+++ b/src/intel/tools/aubinator_error_decode.c
@@ -68,8 +68,10 @@ print_register(struct gen_spec *spec, const char *name, 
uint32_t reg)
 {
struct gen_group *reg_spec = gen_spec_find_register_by_name(spec, name);
 

[Mesa-dev] [PATCH 3/4] intel/decoder: Stop rounding down to the nearest dword

2017-12-12 Thread Jason Ekstrand
---
 src/intel/common/gen_decoder.c | 23 ---
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/src/intel/common/gen_decoder.c b/src/intel/common/gen_decoder.c
index de4c506..59c76ea 100644
--- a/src/intel/common/gen_decoder.c
+++ b/src/intel/common/gen_decoder.c
@@ -847,27 +847,29 @@ iter_advance_field(struct gen_field_iterator *iter)
 }
 
 static uint64_t
-iter_decode_field_raw(struct gen_field *field,
-  const uint32_t *p,
-  const uint32_t *end)
+iter_decode_field_raw(struct gen_field_iterator *iter)
 {
uint64_t qw = 0;
 
-   if ((field->end - field->start) > 32) {
-  if ((p + 1) < end)
+   int field_start = iter->bit;
+   int field_end = iter->bit + (iter->field->end - iter->field->start);
+
+   const uint32_t *p = iter->p + (iter->bit / 32);
+   if ((field_end - field_start) > 32) {
+  if ((p + 1) < iter->p_end)
  qw = ((uint64_t) p[1]) << 32;
   qw |= p[0];
} else
   qw = p[0];
 
-   qw = field_value(qw, field->start, field->end);
+   qw = field_value(qw, field_start, field_end);
 
/* Address & offset types have to be aligned to dwords, their start bit is
 * a reminder of the alignment requirement.
 */
-   if (field->type.kind == GEN_TYPE_ADDRESS ||
-   field->type.kind == GEN_TYPE_OFFSET)
-  qw <<= field->start % 32;
+   if (iter->field->type.kind == GEN_TYPE_ADDRESS ||
+   iter->field->type.kind == GEN_TYPE_OFFSET)
+  qw <<= field_start % 32;
 
return qw;
 }
@@ -887,8 +889,7 @@ iter_decode_field(struct gen_field_iterator *iter)
 
memset(&v, 0, sizeof(v));
 
-   v.qw = iter_decode_field_raw(iter->field,
-&iter->p[iter->bit / 32], iter->p_end);
+   v.qw = iter_decode_field_raw(iter);
 
const char *enum_name = NULL;
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] meson: fix glx-test race

2017-12-12 Thread Eric Anholt
Dylan Baker  writes:

> This test should rely on dispatch.h being generated, but it doesn't.

Reviewed-by: Eric Anholt 


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeonsi: don't use fast color clear for small images even on APUs

2017-12-12 Thread Marek Olšák
From: Marek Olšák 

Increase the limit and handle non-square images better.

This makes glxgears 20% faster on APUs, and a little more on dGPUs.
We all use and love glxgears.
---
 src/gallium/drivers/radeonsi/si_clear.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_clear.c 
b/src/gallium/drivers/radeonsi/si_clear.c
index 0ac83f4..464b9d7 100644
--- a/src/gallium/drivers/radeonsi/si_clear.c
+++ b/src/gallium/drivers/radeonsi/si_clear.c
@@ -418,26 +418,25 @@ static void si_do_fast_color_clear(struct si_context 
*sctx,
sctx->b.family == CHIP_STONEY)
tex->num_slow_clears++;
}
 
bool need_decompress_pass = false;
 
/* Use a slow clear for small surfaces where the cost of
 * the eliminate pass can be higher than the benefit of fast
 * clear. The closed driver does this, but the numbers may 
differ.
 *
-* Always use fast clear on APUs.
+* This helps on both dGPUs and APUs, even small APUs like 
Mullins.
 */
-   bool too_small = sctx->screen->info.has_dedicated_vram &&
-tex->resource.b.b.nr_samples <= 1 &&
-tex->resource.b.b.width0 <= 256 &&
-tex->resource.b.b.height0 <= 256;
+   bool too_small = tex->resource.b.b.nr_samples <= 1 &&
+tex->resource.b.b.width0 *
+tex->resource.b.b.height0 <= 512 * 512;
 
/* Try to clear DCC first, otherwise try CMASK. */
if (vi_dcc_enabled(tex, 0)) {
uint32_t reset_value;
bool clear_words_needed;
 
if (sctx->screen->debug_flags & DBG(NO_DCC_CLEAR))
continue;
 
/* This can only occur with MSAA. */
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: Fix KNOB_MAX_WORKER_THREADS thread creation override.

2017-12-12 Thread Bruce Cherniak
Environment variable KNOB_MAX_WORKER_THREADS allows the user to override
default thread creation and thread binding.  Previous commit to adjust
linux cpu topology caused setting this KNOB to bind all threads to a single
core.

This patch restores correct functionality of override.

Cc: 
---
 src/gallium/drivers/swr/rasterizer/core/threads.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp 
b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index f4ddc21226..6242cb3fc7 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -213,8 +213,7 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, 
uint32_t& out_numThread
 {
 for (auto &core : node.cores)
 {
-out_numThreadsPerProcGroup = 
std::max((size_t)out_numThreadsPerProcGroup,
-  core.threadIds.size());
+out_numThreadsPerProcGroup += core.threadIds.size();
 }
 }
 
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] meson: build clover

2017-12-12 Thread Dylan Baker
Quoting Jan Vesely (2017-12-12 15:00:28)
> I can try to give it a go before my travels start on Thursday. Dylan,
> are there any prerequisites to this series?
> 
> Jan
> 

Just meson 0.44.0, which came out Sunday.

Dylan


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] meson: build clover

2017-12-12 Thread Jan Vesely
On Tue, 2017-12-12 at 12:08 -0800, Francisco Jerez wrote:
> Dylan Baker  writes:
> 
> > This has only been compile tested.
> > 
> > cc: Curro Jerez 
> > Signed-off-by: Dylan Baker 
> > ---
> >  include/meson.build   |  19 
> >  meson.build   |  27 +-
> >  meson_options.txt |  12 +++
> >  src/gallium/meson.build   |  12 ++-
> >  src/gallium/state_trackers/clover/meson.build | 122 
> > ++
> >  src/gallium/targets/opencl/meson.build|  73 +++
> >  src/gallium/targets/pipe-loader/meson.build   |  76 
> >  7 files changed, 336 insertions(+), 5 deletions(-)
> >  create mode 100644 src/gallium/state_trackers/clover/meson.build
> >  create mode 100644 src/gallium/targets/opencl/meson.build
> >  create mode 100644 src/gallium/targets/pipe-loader/meson.build
> > 
> > diff --git a/include/meson.build b/include/meson.build
> > index e4dae91cede..a2e7ce6580e 100644
> > --- a/include/meson.build
> > +++ b/include/meson.build
> > @@ -78,3 +78,22 @@ if with_gallium_st_nine
> >  subdir : 'd3dadapter',
> >)
> >  endif
> > +
> > +# Only install the headers if we are building a stand alone implementation 
> > and
> > +# not an ICD enabled implementation
> > +if with_gallium_opencl and not with_opencl_icd
> > +  install_headers(
> > +'CL/cl.h',
> > +'CL/cl.hpp',
> > +'CL/cl_d3d10.h',
> > +'CL/cl_d3d11.h',
> > +'CL/cl_dx9_media_sharing.h',
> > +'CL/cl_egl.h',
> > +'CL/cl_ext.h',
> > +'CL/cl_gl.h',
> > +'CL/cl_gl_ext.h',
> > +'CL/cl_platform.h',
> > +'CL/opencl.h',
> > +subdir: 'CL'
> > +  )
> > +endif
> > diff --git a/meson.build b/meson.build
> > index 842d441199e..7892d2d0ec4 100644
> > --- a/meson.build
> > +++ b/meson.build
> > @@ -583,6 +583,20 @@ if with_gallium_st_nine
> >endif
> >  endif
> >  
> > +if get_option('gallium-opencl')
> > +  if not with_gallium
> > +error('OpenCL Clover implementation requires at least one gallium 
> > driver.')
> > +  endif
> > +
> > +  # TODO: alitvec?
> > +  dep_clc = dependency('libclc')
> > +  with_gallium_opencl = true
> > +else
> > +  dep_clc = []
> > +  with_gallium_opencl = false
> > +endif
> > +with_opencl_icd = get_option('gallium-opencl-icd')
> > +
> >  gl_pkgconfig_c_flags = []
> >  if with_platform_x11
> >if with_any_vk or (with_glx == 'dri' and with_dri_platform == 'drm')
> > @@ -930,7 +944,7 @@ dep_thread = dependency('threads')
> >  if dep_thread.found() and host_machine.system() != 'windows'
> >pre_args += '-DHAVE_PTHREAD'
> >  endif
> > -if with_amd_vk or with_gallium_radeonsi or with_gallium_r600 # TODO: clover
> > +if with_amd_vk or with_gallium_radeonsi or with_gallium_r600 or 
> > with_gallium_opencl
> >dep_elf = dependency('libelf', required : false)
> >if not dep_elf.found()
> >  dep_elf = cc.find_library('elf')
> > @@ -972,12 +986,19 @@ if with_amd_vk or with_gallium_radeonsi or 
> > with_gallium_r600
> >  llvm_modules += 'asmparser'
> >endif
> >  endif
> > +if with_gallium_opencl
> > +  llvm_modules += [
> > +'all-targets', 'linker', 'coverage', 'instrumentation', 'ipo', 
> > 'irreader',
> > +'lto', 'option', 'objcarcopts', 'profiledata',
> > +  ]
> > +  # TODO: optional modules
> > +endif
> >  
> >  _llvm = get_option('llvm')
> >  if _llvm == 'auto'
> >dep_llvm = dependency(
> >  'llvm', version : '>= 3.9.0', modules : llvm_modules,
> > -required : with_amd_vk or with_gallium_radeonsi or with_gallium_swr,
> > +required : with_amd_vk or with_gallium_radeonsi or with_gallium_swr or 
> > with_gallium_opencl,
> >)
> >with_llvm = dep_llvm.found()
> >  elif _llvm == 'true'
> > @@ -1154,8 +1175,6 @@ else
> >dep_lmsensors = []
> >  endif
> >  
> > -# TODO: clover
> > -
> >  # TODO: gallium tests
> >  
> >  # TODO: various libdirs
> > diff --git a/meson_options.txt b/meson_options.txt
> > index 74fbfbe0330..f7320cb6fb0 100644
> > --- a/meson_options.txt
> > +++ b/meson_options.txt
> > @@ -120,6 +120,18 @@ option(
> >value : false,
> >description : 'build gallium "nine" Direct3D 9.x state tracker.',
> >  )
> > +option(
> > +  'gallium-opencl',
> > +  type : 'boolean',
> > +  value : false,
> > +  description : 'build gallium "clover" OpenCL state tracker.',
> > +)
> > +option(
> > +  'gallium-opencl-icd',
> > +  type : 'boolean',
> > +  value : true,
> > +  description : 'Build gallium "clover" as an ICD library.',
> > +)
> >  option(
> >'d3d-drivers-path',
> >type : 'string',
> > diff --git a/src/gallium/meson.build b/src/gallium/meson.build
> > index fc21dcf03e1..6330c7514af 100644
> > --- a/src/gallium/meson.build
> > +++ b/src/gallium/meson.build
> > @@ -145,7 +145,17 @@ endif
> >  if with_gallium_st_nine
> >subdir('state_trackers/nine')
> >  endif
> > -# TODO: clover
> > +if with_gallium_opencl
> > +  # TODO: this isn't really clover specific, but ATM clov

[Mesa-dev] [Bug 104233] [needs triage] Crashes XWayland when moving mouse fastly on GNOME application overview

2017-12-12 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104233

--- Comment #1 from f4954...@opayq.com  ---
Note if it is not mesa, which is causing this, I am sorry. Just blindly
guessing here… Any tip would be very much appreciated.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 104233] [needs triage] Crashes XWayland when moving mouse fastly on GNOME application overview

2017-12-12 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104233

Bug ID: 104233
   Summary: [needs triage] Crashes XWayland when moving mouse
fastly on GNOME application overview
   Product: Mesa
   Version: 17.2
  Hardware: Other
OS: All
Status: NEW
  Severity: major
  Priority: medium
 Component: Other
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: f4954...@opayq.com
QA Contact: mesa-dev@lists.freedesktop.org

This needs triage, because I do not really know what causes this.

Here is the crash report:
https://retrace.fedoraproject.org/faf/reports/1911767/
It happened first after the upgrade to Fedora 27.

I use nouveau. On another device using no NVIDIA GPU, I could not reproduce it,
so this could be the cause.

For full STR and more details (including all my speculation) see the report at
https://bugzilla.redhat.com/show_bug.cgi?id=1516466.

$ dnf info mesa-libglapi  
Installierte Pakete
Name : mesa-libglapi
Version  : 17.2.4
Release  : 2.fc27
Arch : x86_64
Größe: 195 k
Quelle   : mesa-17.2.4-2.fc27.src.rpm
Paketquelle  : @System
Aus Paketque : updates
Zusammenfass : Mesa shared glapi
URL  : http://www.mesa3d.org
Lizenz   : MIT
Beschreibung : Mesa shared glapi.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: Add glSpecializeShaderARB to common_desktop_functions

2017-12-12 Thread Dylan Baker
I fixed the copy and pasta for idr that included the r-b tag (I copied and
pasted from the git log), and added the bugzilla entry that Vinson opened
locally.

Quoting Dylan Baker (2017-12-12 11:50:30)
> CC: Nicolai Hähnle 
> CC: Reviewed-by: Ian Romanick 
> CC: Mark Janes 
> Fixes: 46b21b8f906 ("mesa: add GL_ARB_gl_spirv boilerplate")
> Signed-off-by: Dylan Baker 
> ---
>  src/mesa/main/tests/dispatch_sanity.cpp | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/src/mesa/main/tests/dispatch_sanity.cpp 
> b/src/mesa/main/tests/dispatch_sanity.cpp
> index b2ff35717b7..00754deb461 100644
> --- a/src/mesa/main/tests/dispatch_sanity.cpp
> +++ b/src/mesa/main/tests/dispatch_sanity.cpp
> @@ -1020,6 +1020,9 @@ const struct function 
> common_desktop_functions_possible[] = {
> { "glImportMemoryFdEXT", 45, -1 },
> { "glImportSemaphoreFdEXT", 45, -1 },
>  
> +   /* GL_ARB_gl_spirv */
> +   { "glSpecializeShaderARB", 45, -1 },
> +
> { NULL, 0, -1 }
>  };
>  
> -- 
> 2.15.1
> 


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] program: Don't reset SamplersValidated when restoring from shader cache

2017-12-12 Thread Jordan Justen
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103988
Signed-off-by: Jordan Justen 
---
 src/mesa/program/ir_to_mesa.cpp | 16 +---
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 5f663b3d09f..051ed2225b7 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -3115,15 +3115,17 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
   link_shaders(ctx, prog);
}
 
-   if (prog->data->LinkStatus) {
-  /* Reset sampler validated to true, validation happens via the
-   * LinkShader call below.
-   */
+   /* If LinkStatus is linking_success, then reset sampler validated to true,
+* validation happens via the LinkShader call below. If LinkStatus is
+* linking_skipped, then SamplersValidated will have been restored from the
+* shader cache.
+*/
+   if (prog->data->LinkStatus == linking_success) {
   prog->SamplersValidated = GL_TRUE;
+   }
 
-  if (!ctx->Driver.LinkShader(ctx, prog)) {
- prog->data->LinkStatus = linking_failure;
-  }
+   if (prog->data->LinkStatus && !ctx->Driver.LinkShader(ctx, prog)) {
+  prog->data->LinkStatus = linking_failure;
}
 
/* Return early if we are loading the shader from on-disk cache */
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] radeonsi: don't call force_dcc_off for buffers

2017-12-12 Thread Marek Olšák
From: Marek Olšák 

This was undefined yet harmless behavior in LLVM.
Not anymore - it causes a hang now.

Cc: 17.3 
---
 src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index 35ada5f..80f986a 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -139,21 +139,21 @@ LLVMValueRef si_load_image_desc(struct si_shader_context 
*ctx,
 LLVMConstInt(ctx->i32, 2, 0), "");
index = LLVMBuildAdd(builder, index,
 ctx->i32_1, "");
list = LLVMBuildPointerCast(builder, list,
si_const_array(ctx->v4i32, 0), "");
} else {
assert(desc_type == AC_DESC_IMAGE);
}
 
rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index);
-   if (dcc_off)
+   if (desc_type == AC_DESC_IMAGE && dcc_off)
rsrc = force_dcc_off(ctx, rsrc);
return rsrc;
 }
 
 /**
  * Load the resource descriptor for \p image.
  */
 static void
 image_fetch_rsrc(
struct lp_build_tgsi_context *bld_base,
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] radeonsi: set PNT_SPRITE_ENA = point_quad_rasterization

2017-12-12 Thread Marek Olšák
From: Marek Olšák 

This is based on how nvc0 translates the state.
---
 src/gallium/drivers/radeonsi/si_state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 5c4c383..7cf3896 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -889,21 +889,21 @@ static void *si_create_rs_state(struct pipe_context *ctx,

S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0;
rs->pa_cl_clip_cntl =
S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) |
S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) |
S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) |
S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) |
S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
 
si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0,
S_0286D4_FLAT_SHADE_ENA(1) |
-   S_0286D4_PNT_SPRITE_ENA(1) |
+   S_0286D4_PNT_SPRITE_ENA(state->point_quad_rasterization) |
S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != 
PIPE_SPRITE_COORD_UPPER_LEFT));
 
/* point size 12.4 fixed point */
tmp = (unsigned)(state->point_size * 8.0);
si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | 
S_028A00_WIDTH(tmp));
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] gallium/util: add util_num_layers helper

2017-12-12 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/auxiliary/util/u_blitter.c| 4 ++--
 src/gallium/auxiliary/util/u_gen_mipmap.c | 4 ++--
 src/gallium/auxiliary/util/u_inlines.h| 8 +++-
 src/gallium/drivers/r600/r600_texture.c   | 8 
 src/gallium/drivers/radeon/r600_texture.c | 8 
 src/gallium/drivers/radeonsi/si_clear.c   | 2 +-
 src/gallium/drivers/rbug/rbug_core.c  | 2 +-
 7 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_blitter.c 
b/src/gallium/auxiliary/util/u_blitter.c
index 476ef08..c7cc11d 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -2070,22 +2070,22 @@ void util_blitter_generate_mipmap(struct 
blitter_context *blitter,
   struct pipe_box dstbox = {0}, srcbox = {0};
   unsigned dst_level = src_level + 1;
 
   dstbox.width = u_minify(tex->width0, dst_level);
   dstbox.height = u_minify(tex->height0, dst_level);
 
   srcbox.width = u_minify(tex->width0, src_level);
   srcbox.height = u_minify(tex->height0, src_level);
 
   if (target == PIPE_TEXTURE_3D) {
- dstbox.depth = util_max_layer(tex, dst_level) + 1;
- srcbox.depth = util_max_layer(tex, src_level) + 1;
+ dstbox.depth = util_num_layers(tex, dst_level);
+ srcbox.depth = util_num_layers(tex, src_level);
   } else {
  dstbox.z = srcbox.z = first_layer;
  dstbox.depth = srcbox.depth = last_layer - first_layer + 1;
   }
 
   /* Initialize the surface. */
   util_blitter_default_dst_texture(&dst_templ, tex, dst_level,
first_layer);
   dst_templ.format = format;
   dst_view = pipe->create_surface(pipe, tex, &dst_templ);
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c 
b/src/gallium/auxiliary/util/u_gen_mipmap.c
index aa8eaeb..3c55d9f 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -106,22 +106,22 @@ util_gen_mipmap(struct pipe_context *pipe, struct 
pipe_resource *pt,
 
   blit.src.box.width = u_minify(pt->width0, blit.src.level);
   blit.src.box.height = u_minify(pt->height0, blit.src.level);
 
   blit.dst.box.width = u_minify(pt->width0, blit.dst.level);
   blit.dst.box.height = u_minify(pt->height0, blit.dst.level);
 
   if (pt->target == PIPE_TEXTURE_3D) {
  /* generate all layers/slices at once */
  blit.src.box.z = blit.dst.box.z = 0;
- blit.src.box.depth = util_max_layer(pt, blit.src.level)+1;
- blit.dst.box.depth = util_max_layer(pt, blit.dst.level)+1;
+ blit.src.box.depth = util_num_layers(pt, blit.src.level);
+ blit.dst.box.depth = util_num_layers(pt, blit.dst.level);
   }
   else {
  blit.src.box.z = blit.dst.box.z = first_layer;
  blit.src.box.depth = blit.dst.box.depth =
 (last_layer + 1 - first_layer);
   }
 
   pipe->blit(pipe, &blit);
}
return TRUE;
diff --git a/src/gallium/auxiliary/util/u_inlines.h 
b/src/gallium/auxiliary/util/u_inlines.h
index 790352d..4ba6ad7 100644
--- a/src/gallium/auxiliary/util/u_inlines.h
+++ b/src/gallium/auxiliary/util/u_inlines.h
@@ -659,27 +659,33 @@ util_max_layer(const struct pipe_resource *r, unsigned 
level)
   /* fall-through */
case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
case PIPE_TEXTURE_CUBE_ARRAY:
   return r->array_size - 1;
default:
   return 0;
}
 }
 
+static inline unsigned
+util_num_layers(const struct pipe_resource *r, unsigned level)
+{
+   return util_max_layer(r, level) + 1;
+}
+
 static inline bool
 util_texrange_covers_whole_level(const struct pipe_resource *tex,
  unsigned level, unsigned x, unsigned y,
  unsigned z, unsigned width,
  unsigned height, unsigned depth)
 {
return x == 0 && y == 0 && z == 0 &&
   width == u_minify(tex->width0, level) &&
   height == u_minify(tex->height0, level) &&
-  depth == util_max_layer(tex, level) + 1;
+  depth == util_num_layers(tex, level);
 }
 
 #ifdef __cplusplus
 }
 #endif
 
 #endif /* U_INLINES_H */
diff --git a/src/gallium/drivers/r600/r600_texture.c 
b/src/gallium/drivers/r600/r600_texture.c
index 3895d53..03cdcd2 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -386,21 +386,21 @@ static void r600_reallocate_texture_inplace(struct 
r600_common_context *rctx,
if (!new_tex)
return;
 
/* Copy the pixels to the new texture. */
if (!invalidate_storage) {
for (i = 0; i <= templ.last_level; i++) {
struct pipe_box box;
 
u_box_3d(0, 0, 0,
 u_minify(templ.width0, i), 
u_minify(templ.height0, i),
-util_max_layer(&templ, i) + 1

[Mesa-dev] [Bug 104231] DispatchSanity_test.GL30 regression

2017-12-12 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104231

--- Comment #1 from Dylan Baker  ---
I submitted a patch for this already:
https://patchwork.freedesktop.org/patch/192994/

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/3] GEM BO padding to find OOB buffer writes

2017-12-12 Thread Jason Ekstrand
I really like this in principal.  I left comments on patches 2 and 3 which
I think make things better.  My comments on 2 are a bit on the bikeshed
side, but I think reasonable.

On Fri, Dec 8, 2017 at 2:54 AM,  wrote:

> From: Kevin Rogovin 
>
> This patch series adds a new debug option to pad each GEM BO
> allocated by the brw_bufmgr with random noise values which
> are then checked after each batchbuffer dispatch to the kernel.
> This can be quite valuable to find diffucult to track down
> heisenberg style bugs.
>
> A possible follow-up series would be to write to stderr (or
> another logging mechanism) if the OOB write is to a GEM BO that
> backs a GL buffer object; that features would be quite useful for
> application developers.
>
> Kevin Rogovin (3):
>   intel/common:add debug flag for adding and checking padding on BO's
>   i965: add noise padding to buffer object and function to check if
> noise is correct
>   i965: if DEBUG_OUT_OF_BOUND_CHK is up, check that noise padding for
> each bo used in batchbuffer is correct
>
>  src/intel/common/gen_debug.c  |  1 +
>  src/intel/common/gen_debug.h  |  1 +
>  src/mesa/drivers/dri/i965/brw_bufmgr.c| 68
> ++-
>  src/mesa/drivers/dri/i965/brw_bufmgr.h| 12 +
>  src/mesa/drivers/dri/i965/intel_batchbuffer.c | 15 ++
>  5 files changed, 96 insertions(+), 1 deletion(-)
>
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] i965: if DEBUG_OUT_OF_BOUND_CHK is up, check that noise padding for each bo used in batchbuffer is correct

2017-12-12 Thread Jason Ekstrand
On Fri, Dec 8, 2017 at 2:54 AM,  wrote:

> From: Kevin Rogovin 
>
> Signed-off-by: Kevin Rogovin 
> ---
>  src/mesa/drivers/dri/i965/intel_batchbuffer.c | 15 +++
>  1 file changed, 15 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> index 91a6506..549ea3e 100644
> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> @@ -758,6 +758,7 @@ execbuffer(int fd,
> };
>
> unsigned long cmd = DRM_IOCTL_I915_GEM_EXECBUFFER2;
> +   bool detected_out_of_bounds_write = false;
>
> if (in_fence != -1) {
>execbuf.rsvd2 = in_fence;
> @@ -787,6 +788,20 @@ execbuffer(int fd,
>   batch->validation_list[i].offset);
>   bo->gtt_offset = batch->validation_list[i].offset;
>}
> +
> +  if (unlikely(INTEL_DEBUG & DEBUG_OUT_OF_BOUND_CHK)) {
> + if (!brw_bo_padding_is_good(bo)) {
> +detected_out_of_bounds_write = true;
> +fprintf(stderr,
> +"Detected buffer out-of-bounds write from brw_bo %p "
> +"(GEM %u, label = \"%s\")\n",
> +bo, bo->gem_handle, bo->name);
> + }
> +  }
> +   }
>

I think you want to do this at the end of submit_batch instead and add a
brw_bo_wait_rendering on the batch.  Otherwise, your bounds checking is
racing with the GPU.


> +
> +   if (unlikely(detected_out_of_bounds_write)) {
> +  abort();
> }
>
> if (ret == 0 && out_fence != NULL)
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radv: Don't advertise VK_EXT_debug_report.

2017-12-12 Thread Bas Nieuwenhuizen
We never supported it. Missed during copy and pasting.

Fixes: 17201a2eb0b "radv: port to using updated anv entrypoint/extension 
generator."
---
 src/amd/vulkan/radv_extensions.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/amd/vulkan/radv_extensions.py 
b/src/amd/vulkan/radv_extensions.py
index 493d1ed7749..2c1c71ecdc7 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -78,7 +78,6 @@ EXTENSIONS = [
 Extension('VK_KHR_xcb_surface',   6, 
'VK_USE_PLATFORM_XCB_KHR'),
 Extension('VK_KHR_xlib_surface',  6, 
'VK_USE_PLATFORM_XLIB_KHR'),
 Extension('VK_KHX_multiview', 1, True),
-Extension('VK_EXT_debug_report',  8, True),
 Extension('VK_EXT_external_memory_dma_buf',   1, True),
 Extension('VK_EXT_global_priority',   1, 
'device->rad_info.has_ctx_priority'),
 Extension('VK_AMD_draw_indirect_count',   1, True),
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 104231] DispatchSanity_test.GL30 regression

2017-12-12 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104231

Bug ID: 104231
   Summary: DispatchSanity_test.GL30 regression
   Product: Mesa
   Version: git
  Hardware: x86-64 (AMD64)
OS: All
Status: NEW
  Keywords: regression
  Severity: normal
  Priority: medium
 Component: Mesa core
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: v...@freedesktop.org
QA Contact: mesa-dev@lists.freedesktop.org

mesa: acab52f5201683ec3f3698d25045ed1441ecdd14 (master 17.4.0-devel)

===
   Mesa 17.4.0-devel: src/mesa/main/tests/test-suite.log
===

# TOTAL: 1
# PASS:  0
# SKIP:  0
# XFAIL: 0
# FAIL:  1
# XPASS: 0
# ERROR: 0

.. contents:: :depth: 2

FAIL: main-test
===

Running main() from gtest_main.cc
[==] Running 12 tests from 5 test cases.
[--] Global test environment set-up.
[--] 2 tests from EnumStrings
[ RUN  ] EnumStrings.LookUpByNumber
[   OK ] EnumStrings.LookUpByNumber (0 ms)
[ RUN  ] EnumStrings.LookUpUnknownNumber
[   OK ] EnumStrings.LookUpUnknownNumber (0 ms)
[--] 2 tests from EnumStrings (0 ms total)

[--] 6 tests from DispatchSanity_test
[ RUN  ] DispatchSanity_test.GL31_CORE
[   OK ] DispatchSanity_test.GL31_CORE (2 ms)
[ RUN  ] DispatchSanity_test.GL30
dispatch_sanity.cpp:174: Failure
  Expected: nop_table[i]
  Which is: 0x557fbef96990
To be equal to: table[i]
  Which is: 0x557fbf173680
i = 1069 (SpecializeShader)
[  FAILED  ] DispatchSanity_test.GL30 (0 ms)
[ RUN  ] DispatchSanity_test.GLES11
[   OK ] DispatchSanity_test.GLES11 (0 ms)
[ RUN  ] DispatchSanity_test.GLES2
[   OK ] DispatchSanity_test.GLES2 (0 ms)
[ RUN  ] DispatchSanity_test.GLES3
[   OK ] DispatchSanity_test.GLES3 (1 ms)
[ RUN  ] DispatchSanity_test.GLES31
[   OK ] DispatchSanity_test.GLES31 (0 ms)
[--] 6 tests from DispatchSanity_test (3 ms total)

[--] 2 tests from MesaFormatsTest
[ RUN  ] MesaFormatsTest.FormatTypeAndComps
[   OK ] MesaFormatsTest.FormatTypeAndComps (0 ms)
[ RUN  ] MesaFormatsTest.FormatSanity
[   OK ] MesaFormatsTest.FormatSanity (0 ms)
[--] 2 tests from MesaFormatsTest (0 ms total)

[--] 1 test from MesaExtensionsTest
[ RUN  ] MesaExtensionsTest.AlphabeticallySorted
[   OK ] MesaExtensionsTest.AlphabeticallySorted (0 ms)
[--] 1 test from MesaExtensionsTest (0 ms total)

[--] 1 test from program_state_string
[ RUN  ] program_state_string.depth_range
[   OK ] program_state_string.depth_range (0 ms)
[--] 1 test from program_state_string (0 ms total)

[--] Global test environment tear-down
[==] 12 tests from 5 test cases ran. (3 ms total)
[  PASSED  ] 11 tests.
[  FAILED  ] 1 test, listed below:
[  FAILED  ] DispatchSanity_test.GL30

 1 FAILED TEST
FAIL main-test (exit status: 1)

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965: compute scratch space size correctly for Gen9

2017-12-12 Thread Kenneth Graunke
On Tuesday, December 12, 2017 12:19:16 PM PST Rogovin, Kevin wrote:
> Glad that this helped. The main lead for fixing the bug I got from
> using the patch series posted earlier this week " GEM BO padding to
> find OOB buffer writes" (URL:
> https://lists.freedesktop.org/archives/mesa-dev/2017-December/179658.html).
> I am hoping that that patch series can get reviewed and land in Mesa
> so that hunting for a certain classes of Heisenberg bugs can be less
> Heisenberg like.
> 
> Best Regards,
> -Kevin

Yeah, I like the idea - it's on my list of things to look at.

--Ken


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] i965: add noise padding to buffer object and function to check if noise is correct

2017-12-12 Thread Jason Ekstrand
I can't help but think that this could be a bit simpler and involve
throwing fewer pointers around.

On Fri, Dec 8, 2017 at 2:54 AM,  wrote:

> From: Kevin Rogovin 
>
> Signed-off-by: Kevin Rogovin 
> ---
>  src/mesa/drivers/dri/i965/brw_bufmgr.c | 68
> +-
>  src/mesa/drivers/dri/i965/brw_bufmgr.h | 12 ++
>  2 files changed, 79 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> index 52b5bf9..7167165 100644
> --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> @@ -367,7 +367,14 @@ retry:
>bo->size = bo_size;
>bo->idle = true;
>
> -  struct drm_i915_gem_create create = { .size = bo_size };
> +  bo->padding.size = 0;
> +  bo->padding.value = NULL;
> +  bo->padding.tmp = NULL;
> +  if (unlikely(INTEL_DEBUG & DEBUG_OUT_OF_BOUND_CHK)) {
> + bo->padding.size = getpagesize();
>

This is 4096.  I think we could just have a single uint32_t padding field
which is either 0 or 4096 (More on that later).


> +  }
> +
> +  struct drm_i915_gem_create create = { .size = bo_size +
> bo->padding.size };
>
>/* All new BOs we get from the kernel are zeroed, so we don't need
> to
> * worry about that here.
> @@ -378,6 +385,31 @@ retry:
>   goto err;
>}
>
> +  if (unlikely(bo->padding.size > 0)) {
> + struct drm_i915_gem_pwrite pwrite;
> +
> + bo->padding.value = calloc(bo->padding.size, 1);
> + bo->padding.tmp = calloc(bo->padding.size, 1);
> + if (!bo->padding.value || !bo->padding.tmp) {
> +goto err_free;
> + }
> +
> + for (uint32_t i = 0; i < bo->padding.size; ++i) {
> +bo->padding.value[i] = rand() & 0xFF;
>

Does using rand() really help us?  Why not just come up with some hash-like
thing which generates consistent pseudo-random data?  How about something
like "value.[i] = i * 853 + 193"  (some random primes)?  That would mean
that we can generate the data and check it without having to store it in a
per-bo temporary.  If you want it to be magic per-bo, you could also seed
it somehow with the bo handle (just add handle * 607).

If we always allocate 4096B of padding, then you don't need to heap
allocate it and can just put it on the stack for the purpose of interacting
with pread/pwrite.  It's a bit big but still perfectly reasonable.  If a
day came when we wanted to make the padding size adjustable, it would still
probably be reasonable to make the heap allocations temporary so we have
less random stuff in the BO we have to cleanup.


> + }
> +
> + pwrite.handle = create.handle;
> + pwrite.pad = 0;
> + pwrite.offset = bo_size;
> + pwrite.size = bo->padding.size;
> + pwrite.data_ptr = (__u64) (uintptr_t) bo->padding.value;
> + ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
>

There's a part of me that wants to kill pread/write.  However, I think you
may have come up with the only good use of it I've ever seen. :-)


> +
> + if (ret != 0) {
> +goto err_free;
> + }
> +  }
> +
>bo->gem_handle = create.handle;
>
>bo->bufmgr = bufmgr;
> @@ -424,6 +456,26 @@ err:
> return NULL;
>  }
>
> +bool
> +brw_bo_padding_is_good(struct brw_bo *bo)
> +{
> +   if (bo->padding.size > 0) {
> +  struct drm_i915_gem_pread pread;
> +  int ret;
> +
> +  assert(bo->padding.tmp && bo->padding.value);
> +  pread.handle = bo->gem_handle;
> +  pread.pad = 0;
> +  pread.offset = bo->size;
> +  pread.size = bo->padding.size;
> +  pread.data_ptr = (__u64) (uintptr_t) bo->padding.tmp;
> +  ret = drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_PREAD, &pread);
> +  assert(ret == 0);
> +  return memcmp(bo->padding.tmp, bo->padding.value, bo->padding.size)
> == 0;
> +   }
> +   return true;
> +}
> +
>  struct brw_bo *
>  brw_bo_alloc(struct brw_bufmgr *bufmgr,
>   const char *name, uint64_t size, uint64_t alignment)
> @@ -598,6 +650,17 @@ bo_free(struct brw_bo *bo)
>DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
>bo->gem_handle, bo->name, strerror(errno));
> }
> +
> +   if (unlikely(INTEL_DEBUG & DEBUG_OUT_OF_BOUND_CHK)) {
> +  if (bo->padding.value) {
> + free(bo->padding.value);
>

free will happily ignore NULL pointers.  The check is redundant.


> +  }
> +
> +  if (bo->padding.tmp) {
> + free(bo->padding.tmp);
> +  }
>

If we still keep these heap allocations, deleting them should be keyed off
of bo->padding.size or nothing at all.


> +   }
> +
> free(bo);
>  }
>
> @@ -1156,6 +1219,9 @@ brw_bo_gem_create_from_prime(struct brw_bufmgr
> *bufmgr, int prime_fd)
> bo->name = "prime";
> bo->reusable = false;
> bo->external = true;
> +   bo->padding.size = 0;
> +   bo->padding.value = NULL;
> +   bo->padd

Re: [Mesa-dev] [PATCH 4/4] meson: build clover

2017-12-12 Thread Francisco Jerez
Dylan Baker  writes:

> This has only been compile tested.
>
> cc: Curro Jerez 
> Signed-off-by: Dylan Baker 
> ---
>  include/meson.build   |  19 
>  meson.build   |  27 +-
>  meson_options.txt |  12 +++
>  src/gallium/meson.build   |  12 ++-
>  src/gallium/state_trackers/clover/meson.build | 122 
> ++
>  src/gallium/targets/opencl/meson.build|  73 +++
>  src/gallium/targets/pipe-loader/meson.build   |  76 
>  7 files changed, 336 insertions(+), 5 deletions(-)
>  create mode 100644 src/gallium/state_trackers/clover/meson.build
>  create mode 100644 src/gallium/targets/opencl/meson.build
>  create mode 100644 src/gallium/targets/pipe-loader/meson.build
>
> diff --git a/include/meson.build b/include/meson.build
> index e4dae91cede..a2e7ce6580e 100644
> --- a/include/meson.build
> +++ b/include/meson.build
> @@ -78,3 +78,22 @@ if with_gallium_st_nine
>  subdir : 'd3dadapter',
>)
>  endif
> +
> +# Only install the headers if we are building a stand alone implementation 
> and
> +# not an ICD enabled implementation
> +if with_gallium_opencl and not with_opencl_icd
> +  install_headers(
> +'CL/cl.h',
> +'CL/cl.hpp',
> +'CL/cl_d3d10.h',
> +'CL/cl_d3d11.h',
> +'CL/cl_dx9_media_sharing.h',
> +'CL/cl_egl.h',
> +'CL/cl_ext.h',
> +'CL/cl_gl.h',
> +'CL/cl_gl_ext.h',
> +'CL/cl_platform.h',
> +'CL/opencl.h',
> +subdir: 'CL'
> +  )
> +endif
> diff --git a/meson.build b/meson.build
> index 842d441199e..7892d2d0ec4 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -583,6 +583,20 @@ if with_gallium_st_nine
>endif
>  endif
>  
> +if get_option('gallium-opencl')
> +  if not with_gallium
> +error('OpenCL Clover implementation requires at least one gallium 
> driver.')
> +  endif
> +
> +  # TODO: alitvec?
> +  dep_clc = dependency('libclc')
> +  with_gallium_opencl = true
> +else
> +  dep_clc = []
> +  with_gallium_opencl = false
> +endif
> +with_opencl_icd = get_option('gallium-opencl-icd')
> +
>  gl_pkgconfig_c_flags = []
>  if with_platform_x11
>if with_any_vk or (with_glx == 'dri' and with_dri_platform == 'drm')
> @@ -930,7 +944,7 @@ dep_thread = dependency('threads')
>  if dep_thread.found() and host_machine.system() != 'windows'
>pre_args += '-DHAVE_PTHREAD'
>  endif
> -if with_amd_vk or with_gallium_radeonsi or with_gallium_r600 # TODO: clover
> +if with_amd_vk or with_gallium_radeonsi or with_gallium_r600 or 
> with_gallium_opencl
>dep_elf = dependency('libelf', required : false)
>if not dep_elf.found()
>  dep_elf = cc.find_library('elf')
> @@ -972,12 +986,19 @@ if with_amd_vk or with_gallium_radeonsi or 
> with_gallium_r600
>  llvm_modules += 'asmparser'
>endif
>  endif
> +if with_gallium_opencl
> +  llvm_modules += [
> +'all-targets', 'linker', 'coverage', 'instrumentation', 'ipo', 
> 'irreader',
> +'lto', 'option', 'objcarcopts', 'profiledata',
> +  ]
> +  # TODO: optional modules
> +endif
>  
>  _llvm = get_option('llvm')
>  if _llvm == 'auto'
>dep_llvm = dependency(
>  'llvm', version : '>= 3.9.0', modules : llvm_modules,
> -required : with_amd_vk or with_gallium_radeonsi or with_gallium_swr,
> +required : with_amd_vk or with_gallium_radeonsi or with_gallium_swr or 
> with_gallium_opencl,
>)
>with_llvm = dep_llvm.found()
>  elif _llvm == 'true'
> @@ -1154,8 +1175,6 @@ else
>dep_lmsensors = []
>  endif
>  
> -# TODO: clover
> -
>  # TODO: gallium tests
>  
>  # TODO: various libdirs
> diff --git a/meson_options.txt b/meson_options.txt
> index 74fbfbe0330..f7320cb6fb0 100644
> --- a/meson_options.txt
> +++ b/meson_options.txt
> @@ -120,6 +120,18 @@ option(
>value : false,
>description : 'build gallium "nine" Direct3D 9.x state tracker.',
>  )
> +option(
> +  'gallium-opencl',
> +  type : 'boolean',
> +  value : false,
> +  description : 'build gallium "clover" OpenCL state tracker.',
> +)
> +option(
> +  'gallium-opencl-icd',
> +  type : 'boolean',
> +  value : true,
> +  description : 'Build gallium "clover" as an ICD library.',
> +)
>  option(
>'d3d-drivers-path',
>type : 'string',
> diff --git a/src/gallium/meson.build b/src/gallium/meson.build
> index fc21dcf03e1..6330c7514af 100644
> --- a/src/gallium/meson.build
> +++ b/src/gallium/meson.build
> @@ -145,7 +145,17 @@ endif
>  if with_gallium_st_nine
>subdir('state_trackers/nine')
>  endif
> -# TODO: clover
> +if with_gallium_opencl
> +  # TODO: this isn't really clover specific, but ATM clover is the only
> +  # consumer
> +  subdir('targets/pipe-loader')
> +
> +  if meson.version().version_compare('< 0.44.0')
> +error('OpenCL requires meson 0.44.0 or greater.')
> +  endif
> +  subdir('state_trackers/clover')
> +  subdir('targets/opencl')
> +endif
>  if with_dri
>subdir('state_trackers/dri')
>subdir('targets/dri')
> diff --git a/src/g

Re: [Mesa-dev] [PATCH 2/2] i965: compute scratch space size correctly for Gen9

2017-12-12 Thread Rogovin, Kevin
Glad that this helped. The main lead for fixing the bug I got from using the 
patch series posted earlier this week " GEM BO padding to find OOB buffer 
writes" (URL: 
https://lists.freedesktop.org/archives/mesa-dev/2017-December/179658.html). I 
am hoping that that patch series can get reviewed and land in Mesa so that 
hunting for a certain classes of Heisenberg bugs can be less Heisenberg like.

Best Regards,
-Kevin

-Original Message-
From: Kenneth Graunke [mailto:kenn...@whitecape.org] 
Sent: Tuesday, December 12, 2017 9:09 PM
To: mesa-dev@lists.freedesktop.org
Cc: Rogovin, Kevin 
Subject: Re: [Mesa-dev] [PATCH 2/2] i965: compute scratch space size correctly 
for Gen9

On Tuesday, December 12, 2017 4:17:27 AM PST kevin.rogo...@intel.com wrote:
> From: Kevin Rogovin 
> 
> Signed-off-by: Kevin Rogovin 
> ---
>  src/mesa/drivers/dri/i965/brw_program.c | 6 +-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
> b/src/mesa/drivers/dri/i965/brw_program.c
> index 6aa4100..1ae0aa0 100644
> --- a/src/mesa/drivers/dri/i965/brw_program.c
> +++ b/src/mesa/drivers/dri/i965/brw_program.c
> @@ -368,9 +368,13 @@ brw_alloc_stage_scratch(struct brw_context *brw,
> *
> * According to the other driver team, this applies to compute shaders
> * as well.  This is not currently documented at all.
> +   *
> +   * brw->screen->subslice_total is the TOTAL number of subslices
> +   * and we wish to view that there are 4 subslices per slice
> +   * instead of the actual number of subslices per slice.
> */
>if (devinfo->gen >= 9)
> - subslices = 4;
> + subslices = 4 * brw->screen->devinfo.num_slices;
>  
>/* WaCSScratchSize:hsw
> *
> 

Thank you!  I'd meant to clean up the nonsense in patch 1 a while ago, but I 
guess I got distracted.  Good catch on the bug, too...

First is R-b, and this one gets:

Fixes: 8ecdbb61360 "i965: Pretend there are 4 subslices for compute shader 
threads on Gen9+."
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104005
Reviewed-by: Kenneth Graunke 

Both are now pushed:

To ssh://git.freedesktop.org/git/mesa/mesa
   7469966ed2a..b1ce812c514  master -> master

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH mesa] meson: add dep_thread to every lib that includes threads.h

2017-12-12 Thread Dylan Baker
Quoting Emil Velikov (2017-12-12 07:04:09)
> On 11 December 2017 at 22:22, Dylan Baker  wrote:
> > Quoting Emil Velikov (2017-12-11 12:06:35)
> >> On 7 December 2017 at 17:25, Dylan Baker  wrote:
> >> > Quoting Emil Velikov (2017-12-07 08:40:27)
> >> >> On 7 December 2017 at 14:51, Eric Engestrom  
> >> >> wrote:
> >> >> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104141
> >> >> > Signed-off-by: Eric Engestrom 
> >> >> > ---
> >> >> >  src/broadcom/meson.build| 2 +-
> >> >> >  src/gallium/auxiliary/meson.build   | 2 +-
> >> >> >  src/gallium/state_trackers/nine/meson.build | 1 +
> >> >> >  src/gallium/targets/xa/meson.build  | 2 +-
> >> >> >  src/gallium/targets/xvmc/meson.build| 2 +-
> >> >> >  src/gbm/meson.build | 2 +-
> >> >> >  src/intel/common/meson.build| 2 +-
> >> >> >  src/loader/meson.build  | 2 +-
> >> >> >  src/util/meson.build| 2 +-
> >> >> >  9 files changed, 9 insertions(+), 8 deletions(-)
> >> >> >
> >> >> I doubt we can continue and pretend to be libpthread.so free.
> >> >> To make it even funnier, depending on moon cycle or other fun factors,
> >> >> we could get the pthread dependency implicitly satisfied as one of the
> >> >> other shared libraries already pulls the library.
> >> >>
> >> >> So how about we simply append -pthread to CC/CXX with at global scope
> >> >> and drop the all the individual dependencies?
> >> >> It will safe us a few characters to type, plus will ensure that newly
> >> >> added binaries don't fall victim of the same issue.
> >> >
> >> > Absolutely not. The meson build has dep_thread for a reason, because 
> >> > meson
> >> > guarantees that calling `dependency('threads')` will always return the 
> >> > right
> >> > value for your platform, even if that platform is windows and doesn't 
> >> > have
> >> > pthreads at all (but does the right thing for cygwin).
> >> >
> >> I would recommend looking through clang/gcc. AFAICS any* platform/arch
> >> combo supported by Mesa handles -pthread and that toggle does the
> >> "right thing".
> >> Obviously that can seem a bit hacky, so a better way to avoid all the
> >> copy/paste is for meson to grow an option that allows folding the
> >> required cflags/libs with the compiler directive.
> >
> > That's all fine, but the meson build is planning on supporting haiku and 
> > plain
> > windows (with msvc), neither of which have pthreads (haiku does, but it's 
> > not a
> > standalone library and you don't pass -pthreads to the compiler or linker 
> > and
> > it's an error to do so). macOS clang also warns when passing -pthreads to 
> > the
> > linker (but only the one shipped with xcode), not if you build clang 
> > yourself.
> >
> > If you feel strongly about it, open a bug upstream and discuss it with 
> > upstream.
> > If they agree and add a mechanism to do so I'd be fine using it.
> >
> >> > The reason that we're running into this problem is as you guessed that 
> >> > some
> >> > dependencies pull in pthreads implicitly, for example LLVM, which is why 
> >> > we're
> >> > seeing this so often in gallium.
> >> >
> >> Precisely. Due to the combinatoric explosions things are bound to
> >> break again, hence my earlier suggestion.
> >> I doubt you or anyone on the team will be excited to see things break.
> >
> > That's possible, obviously. I also think these sort of issues will work
> > themselves out fairly quickly, while I'm very concerned adding -pthread 
> > into the
> > list of arguments we pass unconditionally is going to break whole platforms 
> > in
> > subtle and hard to fix ways, and really goes against the philosophy of 
> > meson,
> > which is to solve these sort of problems in meson itself, rather than each 
> > build
> > system solving them again and again, usually incorrectly.
> >
> > If we want to trot out the big hammer, I'd be happier just to add 
> > dep_thread to
> > every shared library and binary than trying to add the right combination of
> > -pthreads and -lpthreads for each platform ourselves to the C and C++ flags.
> >
> > There's about 350 uses of pthread symbols in mesa itself, of that there are 
> > 56
> > unique files containing pthread symbols (some of which are generators), and 
> > of
> > that there are only 23 unique folders containing pthread symbols. I think 
> > that
> > getting this right is very doable.
> >
> > I'll start auditing the meson build to see if there's any place that we're
> > missing passing pthreads directly.
> >
> Guess I should have made it more obvious:
> 
> I'm trying to save you (amongst others) the annoyance as things break
> - since they will break :-(
> It's entirely up-to you to decide on the best approach to mitigate or
> even avoid that.
> 
> HTH
> Emil

I do appreciate it, I know you've dealt with a number of these problems already
with the other build systems, and a number of the suggestions that you've made
are very good 

Re: [Mesa-dev] [PATCH] intel/fs/bank_conflicts: Don't touch Gen7 MRF hack registers.

2017-12-12 Thread Matt Turner
Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4] meson: set opencl flags for r600

2017-12-12 Thread Dylan Baker
Quoting Eric Engestrom (2017-12-12 06:15:58)
> On Monday, 2017-12-11 11:50:01 -0800, Dylan Baker wrote:
> > Quoting Eric Engestrom (2017-12-11 07:55:30)
> > > On Friday, 2017-12-08 16:27:21 -0800, Dylan Baker wrote:
> > > > Signed-off-by: Dylan Baker 
> > > 
> > > Should come after the current 4/4, but
> > > Reviewed-by: Eric Engestrom 
> > > 
> > > > ---
> > > >  src/gallium/drivers/r600/meson.build | 7 +--
> > > >  1 file changed, 5 insertions(+), 2 deletions(-)
> > > > 
> > > > diff --git a/src/gallium/drivers/r600/meson.build 
> > > > b/src/gallium/drivers/r600/meson.build
> > > > index 2132dbb33ad..5899518a2e8 100644
> > > > --- a/src/gallium/drivers/r600/meson.build
> > > > +++ b/src/gallium/drivers/r600/meson.build
> > > > @@ -113,12 +113,15 @@ egd_tables_h = custom_target(
> > > >capture : true,
> > > >  )
> > > >  
> > > > -# TODO: compute defines
> > > > +r600_c_args = []
> > > > +if with_gallium_opencl
> > > > +  r600_c_args += '-DHAVE_OPENCL'
> > > > +endif
> > > >  
> > > >  libr600 = static_library(
> > > >'r600',
> > > >[files_r600, egd_tables_h],
> > > > -  c_args : [c_vis_args],
> > > > +  c_args : [c_vis_args, r600_c_args],
> > > >cpp_args : [cpp_vis_args],
> > > >include_directories : [
> > > >  inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_amd_common,
> > > > -- 
> > > > 2.15.1
> > > > 
> > 
> > r600 needs this to work with clover at all. Since it's so minimal, how 
> > would you
> > feel about just squashing this into 4/4?
> 
> You mean r600 would be broken after 4/4 if 3/4 wasn't applied?
> I'm OK with squashing them if so.
> 
> My issue was just that with this order, the build would be broken
> between 3/4 and 4/4 because `with_gallium_opencl` doesn't exist yet.

Right, the current order is broken. Unless I've misunderstood the code without
this patch the code will compile, but trying to run OpenCL workloads with r600
will fail. Maybe it's fine to put that in a follow up patch?


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: Add glSpecializeShaderARB to common_desktop_functions

2017-12-12 Thread Dylan Baker
CC: Nicolai Hähnle 
CC: Reviewed-by: Ian Romanick 
CC: Mark Janes 
Fixes: 46b21b8f906 ("mesa: add GL_ARB_gl_spirv boilerplate")
Signed-off-by: Dylan Baker 
---
 src/mesa/main/tests/dispatch_sanity.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/main/tests/dispatch_sanity.cpp 
b/src/mesa/main/tests/dispatch_sanity.cpp
index b2ff35717b7..00754deb461 100644
--- a/src/mesa/main/tests/dispatch_sanity.cpp
+++ b/src/mesa/main/tests/dispatch_sanity.cpp
@@ -1020,6 +1020,9 @@ const struct function common_desktop_functions_possible[] 
= {
{ "glImportMemoryFdEXT", 45, -1 },
{ "glImportSemaphoreFdEXT", 45, -1 },
 
+   /* GL_ARB_gl_spirv */
+   { "glSpecializeShaderARB", 45, -1 },
+
{ NULL, 0, -1 }
 };
 
-- 
2.15.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] intel/fs/bank_conflicts: Don't touch Gen7 MRF hack registers.

2017-12-12 Thread Francisco Jerez
Fixes: af2c320190f3c731 "intel/fs: Implement GRF bank conflict mitigation pass."
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104199
Reported-by: Darius Spitznagel 
---
 src/intel/compiler/brw_fs.cpp|  2 +-
 src/intel/compiler/brw_fs.h  |  2 +-
 src/intel/compiler/brw_fs_bank_conflicts.cpp | 22 +-
 3 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 0e3ab381fa6..3717c50e32a 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -945,7 +945,7 @@ fs_inst::flags_written() const
  * instruction -- the FS opcodes often generate MOVs in addition.
  */
 int
-fs_visitor::implied_mrf_writes(fs_inst *inst)
+fs_visitor::implied_mrf_writes(fs_inst *inst) const
 {
if (inst->mlen == 0)
   return 0;
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 9c160068a7e..63373580ee4 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -277,7 +277,7 @@ public:
 
struct brw_reg interp_reg(int location, int channel);
 
-   int implied_mrf_writes(fs_inst *inst);
+   int implied_mrf_writes(fs_inst *inst) const;
 
virtual void dump_instructions();
virtual void dump_instructions(const char *name);
diff --git a/src/intel/compiler/brw_fs_bank_conflicts.cpp 
b/src/intel/compiler/brw_fs_bank_conflicts.cpp
index 42cdc6ef7dc..0cd880d44f2 100644
--- a/src/intel/compiler/brw_fs_bank_conflicts.cpp
+++ b/src/intel/compiler/brw_fs_bank_conflicts.cpp
@@ -530,12 +530,12 @@ namespace {
   for (unsigned reg = 0; reg < 2; reg++)
  constrained[p.atom_of_reg(reg)] = true;
 
-  /* Assume that anything referenced via fixed GRFs is baked into the
-   * hardware's fixed-function logic and may be unsafe to move around.
-   * Also take into account the source GRF restrictions of EOT
-   * send-message instructions.
-   */
   foreach_block_and_inst(block, fs_inst, inst, v->cfg) {
+ /* Assume that anything referenced via fixed GRFs is baked into the
+  * hardware's fixed-function logic and may be unsafe to move around.
+  * Also take into account the source GRF restrictions of EOT
+  * send-message instructions.
+  */
  if (inst->dst.file == FIXED_GRF)
 constrained[p.atom_of_reg(reg_of(inst->dst))] = true;
 
@@ -544,6 +544,18 @@ namespace {
 (is_grf(inst->src[i]) && inst->eot))
constrained[p.atom_of_reg(reg_of(inst->src[i]))] = true;
  }
+
+ /* The location of the Gen7 MRF hack registers is hard-coded in the
+  * rest of the compiler back-end.  Don't attempt to move them around.
+  */
+ if (v->devinfo->gen >= 7) {
+assert(inst->dst.file != MRF);
+
+for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
+   const unsigned reg = GEN7_MRF_HACK_START + inst->base_mrf + i;
+   constrained[p.atom_of_reg(reg)] = true;
+}
+ }
   }
 
   return constrained;
-- 
2.14.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 104229] radeon_icd.i686.json api_version is 1.0.3

2017-12-12 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104229

Bug ID: 104229
   Summary: radeon_icd.i686.json api_version is 1.0.3
   Product: Mesa
   Version: unspecified
  Hardware: Other
OS: All
Status: NEW
  Severity: normal
  Priority: medium
 Component: Drivers/Vulkan/radeon
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: mercuri...@yahoo.es
QA Contact: mesa-dev@lists.freedesktop.org

Sorry, not sure if this is a bug but I think it is.

I am using mesa-17.3.0 in gentoo

I have this in vulkan icd

/usr/share/vulkan/icd.d $ cat radeon_icd.i686.json 
{
"file_format_version": "1.0.0",
"ICD": {
"library_path": "/usr/lib32/libvulkan_radeon.so",
"api_version": "1.0.3"
}
}

If you compare with intel counterpart is different.

/usr/share/vulkan/icd.d $ cat intel_icd.i686.json 
{
"file_format_version": "1.0.0", 
"ICD": {
"library_path": "/usr/lib32/libvulkan_intel.so", 
"api_version": "1.0.57"
}
}



If you check the code on git
https://github.com/mesa3d/mesa/blob/master/src/amd/vulkan/radeon_icd.json.in

the last commit is in 2016

but in the intel counterpart is generated by a python script

https://github.com/mesa3d/mesa/blob/master/src/intel/vulkan/anv_icd.py#L42




So I think the ICD is exposing to the application a too old version of a vulkan
driver.

I am wrong?

If this is not an issue, sorry for the noise.

Thank you for your work.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] amd/common: add ac_build_waitcnt()

2017-12-12 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

for the series.

On Tue, Dec 12, 2017 at 6:10 PM, Samuel Pitoiset
 wrote:
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_llvm_build.c|  9 +
>  src/amd/common/ac_llvm_build.h|  2 ++
>  src/amd/common/ac_nir_to_llvm.c   | 14 ++
>  src/gallium/drivers/radeonsi/si_shader.c  | 13 ++---
>  src/gallium/drivers/radeonsi/si_shader_internal.h |  2 --
>  src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c |  4 ++--
>  6 files changed, 17 insertions(+), 27 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index baa921d655..b407678c3b 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -1482,6 +1482,15 @@ LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, 
> LLVMValueRef input,
>   AC_FUNC_ATTR_LEGACY);
>  }
>
> +void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16)
> +{
> +   LLVMValueRef args[1] = {
> +   LLVMConstInt(ctx->i32, simm16, false),
> +   };
> +   ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt",
> +  ctx->voidt, args, 1, 0);
> +}
> +
>  void ac_get_image_intr_name(const char *base_name,
> LLVMTypeRef data_type,
> LLVMTypeRef coords_type,
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index 655dc1dcc8..4a570c41bc 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -286,6 +286,8 @@ LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, 
> LLVMValueRef input,
>   LLVMValueRef offset, LLVMValueRef width,
>   bool is_signed);
>
> +void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16);
> +
>  void ac_get_image_intr_name(const char *base_name,
> LLVMTypeRef data_type,
> LLVMTypeRef coords_type,
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 189421cc4a..255e074e0a 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -3656,16 +3656,6 @@ static LLVMValueRef visit_image_size(struct 
> ac_nir_context *ctx,
>  #define LGKM_CNT 0x07f
>  #define VM_CNT 0xf70
>
> -static void emit_waitcnt(struct nir_to_llvm_context *ctx,
> -unsigned simm16)
> -{
> -   LLVMValueRef args[1] = {
> -   LLVMConstInt(ctx->ac.i32, simm16, false),
> -   };
> -   ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.s.waitcnt",
> -  ctx->ac.voidt, args, 1, 0);
> -}
> -
>  static void emit_membar(struct nir_to_llvm_context *ctx,
> const nir_intrinsic_instr *instr)
>  {
> @@ -3688,7 +3678,7 @@ static void emit_membar(struct nir_to_llvm_context *ctx,
> break;
> }
> if (waitcnt != NOOP_WAITCNT)
> -   emit_waitcnt(ctx, waitcnt);
> +   ac_build_waitcnt(&ctx->ac, waitcnt);
>  }
>
>  static void emit_barrier(struct nir_to_llvm_context *ctx)
> @@ -3699,7 +3689,7 @@ static void emit_barrier(struct nir_to_llvm_context 
> *ctx)
>  */
> if (ctx->options->chip_class == SI &&
> ctx->stage == MESA_SHADER_TESS_CTRL) {
> -   emit_waitcnt(ctx, LGKM_CNT & VM_CNT);
> +   ac_build_waitcnt(&ctx->ac, LGKM_CNT & VM_CNT);
> return;
> }
> ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.s.barrier",
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
> b/src/gallium/drivers/radeonsi/si_shader.c
> index 5da9ec0bf5..de6aa44aa3 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -3679,15 +3679,6 @@ static void si_llvm_return_fs_outputs(struct 
> ac_shader_abi *abi,
> ctx->return_value = ret;
>  }
>
> -void si_emit_waitcnt(struct si_shader_context *ctx, unsigned simm16)
> -{
> -   LLVMValueRef args[1] = {
> -   LLVMConstInt(ctx->i32, simm16, 0)
> -   };
> -   lp_build_intrinsic(ctx->ac.builder, "llvm.amdgcn.s.waitcnt",
> -  ctx->voidt, args, 1, 0);
> -}
> -
>  static void membar_emit(
> const struct lp_build_tgsi_action *action,
> struct lp_build_tgsi_context *bld_base,
> @@ -3710,7 +3701,7 @@ static void membar_emit(
> waitcnt &= LGKM_CNT;
>
> if (waitcnt != NOOP_WAITCNT)
> -   si_emit_waitcnt(ctx, waitcnt);
> +   ac_build_waitcnt(&ctx->ac, waitcnt);
>  }
>
>  static void clock_emit(
> @@ -4196,7 +4187,7 @@ static void si_llvm_emit_barrier(const struct 
> lp_build_tgsi_action *action,
>  */
> if (ctx->screen->info.chip_class == SI &&
> ctx->type == PIPE_SHADER_TESS_CTRL) {
> -   si_emit_waitcnt(ct

Re: [Mesa-dev] [PATCH 2/2] i965: compute scratch space size correctly for Gen9

2017-12-12 Thread Kenneth Graunke
On Tuesday, December 12, 2017 4:17:27 AM PST kevin.rogo...@intel.com wrote:
> From: Kevin Rogovin 
> 
> Signed-off-by: Kevin Rogovin 
> ---
>  src/mesa/drivers/dri/i965/brw_program.c | 6 +-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
> b/src/mesa/drivers/dri/i965/brw_program.c
> index 6aa4100..1ae0aa0 100644
> --- a/src/mesa/drivers/dri/i965/brw_program.c
> +++ b/src/mesa/drivers/dri/i965/brw_program.c
> @@ -368,9 +368,13 @@ brw_alloc_stage_scratch(struct brw_context *brw,
> *
> * According to the other driver team, this applies to compute shaders
> * as well.  This is not currently documented at all.
> +   *
> +   * brw->screen->subslice_total is the TOTAL number of subslices
> +   * and we wish to view that there are 4 subslices per slice
> +   * instead of the actual number of subslices per slice.
> */
>if (devinfo->gen >= 9)
> - subslices = 4;
> + subslices = 4 * brw->screen->devinfo.num_slices;
>  
>/* WaCSScratchSize:hsw
> *
> 

Thank you!  I'd meant to clean up the nonsense in patch 1 a while ago,
but I guess I got distracted.  Good catch on the bug, too...

First is R-b, and this one gets:

Fixes: 8ecdbb61360 "i965: Pretend there are 4 subslices for compute shader 
threads on Gen9+."
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104005
Reviewed-by: Kenneth Graunke 

Both are now pushed:

To ssh://git.freedesktop.org/git/mesa/mesa
   7469966ed2a..b1ce812c514  master -> master



signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] meson: fix glx-test race

2017-12-12 Thread Dylan Baker
This test should rely on dispatch.h being generated, but it doesn't.

Signed-off-by: Dylan Baker 
---
 src/glx/tests/meson.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glx/tests/meson.build b/src/glx/tests/meson.build
index d81b76906da..51b2843fc90 100644
--- a/src/glx/tests/meson.build
+++ b/src/glx/tests/meson.build
@@ -35,7 +35,7 @@ if with_shared_glapi
 
   glx_test = executable(
 'glx-test',
-[files_glx_test, glx_indirect_size_h],
+[files_glx_test, glx_indirect_size_h, main_dispatch_h],
 link_with : [libglx, libglapi],
 include_directories : [
   include_directories('..', '../../../include/GL/internal'),
-- 
2.15.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] gallium/util: add u_transfer_helper

2017-12-12 Thread Eric Anholt
Rob Clark  writes:

> Add a new helper that drivers can use to emulate various things that
> need special handling in particular in transfer_map:
>
>  1) z32_s8x24.. gl/gallium treats this as a single buffer with depth
> and stencil interleaved but hardware frequently treats this as
> separate z32 and s8 buffers.  Special pack/unpack handling is
> needed in transfer_map/unmap to pack/unpack the exposed buffer
>
>  2) fake RGTC.. GPUs designed with GLES in mind, but which can other-
> wise do GL3, if native RGTC is not supported it can be emulated
> by converting to uncompressed internally, but needs pack/unpack
> in transfer_map/unmap
>
>  3) MSAA resolves in the transfer_map() case
>
> v2: add MSAA resolve based on Eric's "gallium: Add helpers for MSAA
> resolves in pipe_transfer_map()/unmap()." patch; avoid wrapping
> pipe_resource, to make it possible for drivers to use both this
> and threaded_context.

Update on changes between versions?

>
> Signed-off-by: Rob Clark 
> ---
>  src/gallium/auxiliary/Makefile.sources |   2 +
>  src/gallium/auxiliary/meson.build  |   2 +
>  src/gallium/auxiliary/util/u_transfer_helper.c | 499 
> +
>  src/gallium/auxiliary/util/u_transfer_helper.h | 135 +++
>  src/gallium/include/pipe/p_screen.h|   8 +-
>  5 files changed, 645 insertions(+), 1 deletion(-)
>  create mode 100644 src/gallium/auxiliary/util/u_transfer_helper.c
>  create mode 100644 src/gallium/auxiliary/util/u_transfer_helper.h
>
> diff --git a/src/gallium/auxiliary/Makefile.sources 
> b/src/gallium/auxiliary/Makefile.sources
> index f40c4723fae..a2dae04698c 100644
> --- a/src/gallium/auxiliary/Makefile.sources
> +++ b/src/gallium/auxiliary/Makefile.sources
> @@ -304,6 +304,8 @@ C_SOURCES := \
>   util/u_tile.h \
>   util/u_transfer.c \
>   util/u_transfer.h \
> + util/u_transfer_helper.c \
> + util/u_transfer_helper.h \
>   util/u_threaded_context.c \
>   util/u_threaded_context.h \
>   util/u_threaded_context_calls.h \
> diff --git a/src/gallium/auxiliary/meson.build 
> b/src/gallium/auxiliary/meson.build
> index 3e623fd099f..8c242ec1a05 100644
> --- a/src/gallium/auxiliary/meson.build
> +++ b/src/gallium/auxiliary/meson.build
> @@ -324,6 +324,8 @@ files_libgallium = files(
>'util/u_tile.h',
>'util/u_transfer.c',
>'util/u_transfer.h',
> +  'util/u_transfer_helper.c',
> +  'util/u_transfer_helper.h',
>'util/u_threaded_context.c',
>'util/u_threaded_context.h',
>'util/u_threaded_context_calls.h',
> diff --git a/src/gallium/auxiliary/util/u_transfer_helper.c 
> b/src/gallium/auxiliary/util/u_transfer_helper.c
> new file mode 100644
> index 000..c987a35b36c
> --- /dev/null
> +++ b/src/gallium/auxiliary/util/u_transfer_helper.c
> @@ -0,0 +1,499 @@
> +/*
> + * Copyright © 2017 Red Hat
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
> FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
> THE
> + * SOFTWARE.
> + */
> +
> +#include "pipe/p_screen.h"
> +
> +#include "util/u_box.h"
> +#include "util/u_format.h"
> +#include "util/u_format_rgtc.h"
> +#include "util/u_format_zs.h"
> +#include "util/u_inlines.h"
> +#include "util/u_transfer_helper.h"
> +
> +
> +struct u_transfer_helper {
> +   const struct u_transfer_vtbl *vtbl;
> +   bool separate_z32s8;
> +   bool fake_rgtc;
> +   bool msaa_map;
> +};
> +
> +static inline bool handle_transfer(struct pipe_resource *prsc)
> +{
> +   struct u_transfer_helper *helper = prsc->screen->transfer_helper;
> +
> +   if (helper->vtbl->get_internal_format) {
> +  enum pipe_format internal_format =
> +helper->vtbl->get_internal_format(prsc);
> +  if (internal_format != prsc->format)
> + return true;
> +   }
> +
> +   if (helper->msaa_map && (prsc->nr_samples > 1))
> +  return true;
> +
> +   return false;
> +}
> +
> +/* The pipe_transfer ptr could either be the driver

Re: [Mesa-dev] [PATCH] mesa: remove second include of errors.h in src/mesa/main/glspirv.c

2017-12-12 Thread Kai Wasserbäch
Hey Ian,
Ian Romanick wrote on 12.12.2017 17:56:
> Weird that this was not noticed before... *shrug*
> 
> Reviewed-by: Ian Romanick 

thank you very much for the R-b! Can you push this for me? I don't have commit
access.

Cheers,
Kai


> On 12/12/2017 07:20 AM, Kai Wasserbäch wrote:
>> Cc: Nicolai Hähnle 
>> Fixes: 5bc03d2508 ("mesa: implement SPIR-V loading in glShaderBinary")
>> Signed-off-by: Kai Wasserbäch 
>> ---
>>  src/mesa/main/glspirv.c | 4 
>>  1 file changed, 4 deletions(-)
>>
>> diff --git a/src/mesa/main/glspirv.c b/src/mesa/main/glspirv.c
>> index 7eb8f906c2..81303057d0 100644
>> --- a/src/mesa/main/glspirv.c
>> +++ b/src/mesa/main/glspirv.c
>> @@ -22,11 +22,7 @@
>>   */
>>  
>>  #include "glspirv.h"
>> -
>> -#include "errors.h"
>> -
>>  #include "errors.h"
>> -
>>  #include "util/u_atomic.h"
>>  
>>  void



signature.asc
Description: OpenPGP digital signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] gallium/util: add u_transfer_helper

2017-12-12 Thread Rob Clark
Add a new helper that drivers can use to emulate various things that
need special handling in particular in transfer_map:

 1) z32_s8x24.. gl/gallium treats this as a single buffer with depth
and stencil interleaved but hardware frequently treats this as
separate z32 and s8 buffers.  Special pack/unpack handling is
needed in transfer_map/unmap to pack/unpack the exposed buffer

 2) fake RGTC.. GPUs designed with GLES in mind, but which can other-
wise do GL3, if native RGTC is not supported it can be emulated
by converting to uncompressed internally, but needs pack/unpack
in transfer_map/unmap

 3) MSAA resolves in the transfer_map() case

v2: add MSAA resolve based on Eric's "gallium: Add helpers for MSAA
resolves in pipe_transfer_map()/unmap()." patch; avoid wrapping
pipe_resource, to make it possible for drivers to use both this
and threaded_context.

Signed-off-by: Rob Clark 
---
 src/gallium/auxiliary/Makefile.sources |   2 +
 src/gallium/auxiliary/meson.build  |   2 +
 src/gallium/auxiliary/util/u_transfer_helper.c | 499 +
 src/gallium/auxiliary/util/u_transfer_helper.h | 135 +++
 src/gallium/include/pipe/p_screen.h|   8 +-
 5 files changed, 645 insertions(+), 1 deletion(-)
 create mode 100644 src/gallium/auxiliary/util/u_transfer_helper.c
 create mode 100644 src/gallium/auxiliary/util/u_transfer_helper.h

diff --git a/src/gallium/auxiliary/Makefile.sources 
b/src/gallium/auxiliary/Makefile.sources
index f40c4723fae..a2dae04698c 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -304,6 +304,8 @@ C_SOURCES := \
util/u_tile.h \
util/u_transfer.c \
util/u_transfer.h \
+   util/u_transfer_helper.c \
+   util/u_transfer_helper.h \
util/u_threaded_context.c \
util/u_threaded_context.h \
util/u_threaded_context_calls.h \
diff --git a/src/gallium/auxiliary/meson.build 
b/src/gallium/auxiliary/meson.build
index 3e623fd099f..8c242ec1a05 100644
--- a/src/gallium/auxiliary/meson.build
+++ b/src/gallium/auxiliary/meson.build
@@ -324,6 +324,8 @@ files_libgallium = files(
   'util/u_tile.h',
   'util/u_transfer.c',
   'util/u_transfer.h',
+  'util/u_transfer_helper.c',
+  'util/u_transfer_helper.h',
   'util/u_threaded_context.c',
   'util/u_threaded_context.h',
   'util/u_threaded_context_calls.h',
diff --git a/src/gallium/auxiliary/util/u_transfer_helper.c 
b/src/gallium/auxiliary/util/u_transfer_helper.c
new file mode 100644
index 000..c987a35b36c
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_transfer_helper.c
@@ -0,0 +1,499 @@
+/*
+ * Copyright © 2017 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_screen.h"
+
+#include "util/u_box.h"
+#include "util/u_format.h"
+#include "util/u_format_rgtc.h"
+#include "util/u_format_zs.h"
+#include "util/u_inlines.h"
+#include "util/u_transfer_helper.h"
+
+
+struct u_transfer_helper {
+   const struct u_transfer_vtbl *vtbl;
+   bool separate_z32s8;
+   bool fake_rgtc;
+   bool msaa_map;
+};
+
+static inline bool handle_transfer(struct pipe_resource *prsc)
+{
+   struct u_transfer_helper *helper = prsc->screen->transfer_helper;
+
+   if (helper->vtbl->get_internal_format) {
+  enum pipe_format internal_format =
+helper->vtbl->get_internal_format(prsc);
+  if (internal_format != prsc->format)
+ return true;
+   }
+
+   if (helper->msaa_map && (prsc->nr_samples > 1))
+  return true;
+
+   return false;
+}
+
+/* The pipe_transfer ptr could either be the driver's, or u_transfer,
+ * depending on whether we are intervening or not.  Check handle_transfer()
+ * before dereferencing.
+ */
+struct u_transfer {
+   struct pipe_transfer base;
+   /* Note that in case of MSAA resolve for transfer plus z32s8 or fake rgtc
+* we end up with stacked u_tr

[Mesa-dev] [PATCH 4/4] amd/common: add ac_build_waitcnt()

2017-12-12 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c|  9 +
 src/amd/common/ac_llvm_build.h|  2 ++
 src/amd/common/ac_nir_to_llvm.c   | 14 ++
 src/gallium/drivers/radeonsi/si_shader.c  | 13 ++---
 src/gallium/drivers/radeonsi/si_shader_internal.h |  2 --
 src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c |  4 ++--
 6 files changed, 17 insertions(+), 27 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index baa921d655..b407678c3b 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1482,6 +1482,15 @@ LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, 
LLVMValueRef input,
  AC_FUNC_ATTR_LEGACY);
 }
 
+void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16)
+{
+   LLVMValueRef args[1] = {
+   LLVMConstInt(ctx->i32, simm16, false),
+   };
+   ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt",
+  ctx->voidt, args, 1, 0);
+}
+
 void ac_get_image_intr_name(const char *base_name,
LLVMTypeRef data_type,
LLVMTypeRef coords_type,
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 655dc1dcc8..4a570c41bc 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -286,6 +286,8 @@ LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, 
LLVMValueRef input,
  LLVMValueRef offset, LLVMValueRef width,
  bool is_signed);
 
+void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16);
+
 void ac_get_image_intr_name(const char *base_name,
LLVMTypeRef data_type,
LLVMTypeRef coords_type,
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 189421cc4a..255e074e0a 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3656,16 +3656,6 @@ static LLVMValueRef visit_image_size(struct 
ac_nir_context *ctx,
 #define LGKM_CNT 0x07f
 #define VM_CNT 0xf70
 
-static void emit_waitcnt(struct nir_to_llvm_context *ctx,
-unsigned simm16)
-{
-   LLVMValueRef args[1] = {
-   LLVMConstInt(ctx->ac.i32, simm16, false),
-   };
-   ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.s.waitcnt",
-  ctx->ac.voidt, args, 1, 0);
-}
-
 static void emit_membar(struct nir_to_llvm_context *ctx,
const nir_intrinsic_instr *instr)
 {
@@ -3688,7 +3678,7 @@ static void emit_membar(struct nir_to_llvm_context *ctx,
break;
}
if (waitcnt != NOOP_WAITCNT)
-   emit_waitcnt(ctx, waitcnt);
+   ac_build_waitcnt(&ctx->ac, waitcnt);
 }
 
 static void emit_barrier(struct nir_to_llvm_context *ctx)
@@ -3699,7 +3689,7 @@ static void emit_barrier(struct nir_to_llvm_context *ctx)
 */
if (ctx->options->chip_class == SI &&
ctx->stage == MESA_SHADER_TESS_CTRL) {
-   emit_waitcnt(ctx, LGKM_CNT & VM_CNT);
+   ac_build_waitcnt(&ctx->ac, LGKM_CNT & VM_CNT);
return;
}
ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.s.barrier",
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 5da9ec0bf5..de6aa44aa3 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3679,15 +3679,6 @@ static void si_llvm_return_fs_outputs(struct 
ac_shader_abi *abi,
ctx->return_value = ret;
 }
 
-void si_emit_waitcnt(struct si_shader_context *ctx, unsigned simm16)
-{
-   LLVMValueRef args[1] = {
-   LLVMConstInt(ctx->i32, simm16, 0)
-   };
-   lp_build_intrinsic(ctx->ac.builder, "llvm.amdgcn.s.waitcnt",
-  ctx->voidt, args, 1, 0);
-}
-
 static void membar_emit(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
@@ -3710,7 +3701,7 @@ static void membar_emit(
waitcnt &= LGKM_CNT;
 
if (waitcnt != NOOP_WAITCNT)
-   si_emit_waitcnt(ctx, waitcnt);
+   ac_build_waitcnt(&ctx->ac, waitcnt);
 }
 
 static void clock_emit(
@@ -4196,7 +4187,7 @@ static void si_llvm_emit_barrier(const struct 
lp_build_tgsi_action *action,
 */
if (ctx->screen->info.chip_class == SI &&
ctx->type == PIPE_SHADER_TESS_CTRL) {
-   si_emit_waitcnt(ctx, LGKM_CNT & VM_CNT);
+   ac_build_waitcnt(&ctx->ac, LGKM_CNT & VM_CNT);
return;
}
 
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h 
b/src/gallium/drivers/radeonsi/si_shader_internal.h
index f50a022db8..e05927c7fd 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src

[Mesa-dev] [PATCH 1/4] radeonsi: make use of ac_build_fdiv()

2017-12-12 Thread Samuel Pitoiset
And move the comment to amd/common.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c| 1 +
 src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 8 +---
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index b2bf1bf7b5..baa921d655 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -439,6 +439,7 @@ ac_build_fdiv(struct ac_llvm_context *ctx,
 {
LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, "");
 
+   /* Use v_rcp_f32 instead of precise division. */
if (!LLVMIsConstant(ret))
LLVMSetMetadata(ret, ctx->fpmath_md_kind, 
ctx->fpmath_md_2p5_ulp);
return ret;
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
index a27586b3ff..cec33c38b4 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
@@ -668,13 +668,7 @@ static void emit_fdiv(const struct lp_build_tgsi_action 
*action,
struct si_shader_context *ctx = si_shader_context(bld_base);
 
emit_data->output[emit_data->chan] =
-   LLVMBuildFDiv(ctx->ac.builder,
- emit_data->args[0], emit_data->args[1], "");
-
-   /* Use v_rcp_f32 instead of precise division. */
-   if (!LLVMIsConstant(emit_data->output[emit_data->chan]))
-   LLVMSetMetadata(emit_data->output[emit_data->chan],
-   ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
+   ac_build_fdiv(&ctx->ac, emit_data->args[0], emit_data->args[1]);
 }
 
 /* 1/sqrt is translated to rsq for f32 if fp32 denormals are not enabled in
-- 
2.15.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] amd/common: more use of i32_1

2017-12-12 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_nir_to_llvm.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 799ba83e89..189421cc4a 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1989,7 +1989,7 @@ get_buffer_size(struct ac_nir_context *ctx, LLVMValueRef 
descriptor, bool in_ele
 */
LLVMValueRef stride =
LLVMBuildExtractElement(ctx->ac.builder, descriptor,
-   LLVMConstInt(ctx->ac.i32, 1, 
false), "");
+   ctx->ac.i32_1, "");
stride = LLVMBuildLShr(ctx->ac.builder, stride,
   LLVMConstInt(ctx->ac.i32, 16, false), 
"");
stride = LLVMBuildAnd(ctx->ac.builder, stride,
@@ -6056,20 +6056,20 @@ write_tess_factors(struct nir_to_llvm_context *ctx)
if (ctx->options->key.tcs.primitive_mode == GL_ISOLINES) {
outer[0] = out[1] = ac_lds_load(&ctx->ac, lds_outer);
lds_outer = LLVMBuildAdd(ctx->builder, lds_outer,
-LLVMConstInt(ctx->ac.i32, 1, false), 
"");
+ctx->ac.i32_1, "");
outer[1] = out[0] = ac_lds_load(&ctx->ac, lds_outer);
} else {
for (i = 0; i < outer_comps; i++) {
outer[i] = out[i] =
ac_lds_load(&ctx->ac, lds_outer);
lds_outer = LLVMBuildAdd(ctx->builder, lds_outer,
-LLVMConstInt(ctx->ac.i32, 1, 
false), "");
+ctx->ac.i32_1, "");
}
for (i = 0; i < inner_comps; i++) {
inner[i] = out[outer_comps+i] =
ac_lds_load(&ctx->ac, lds_inner);
lds_inner = LLVMBuildAdd(ctx->builder, lds_inner,
-LLVMConstInt(ctx->ac.i32, 1, 
false), "");
+ctx->ac.i32_1, "");
}
}
 
-- 
2.15.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] amd/common: more use of i32_0

2017-12-12 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_nir_to_llvm.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 663b27d265..799ba83e89 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1136,7 +1136,7 @@ static LLVMValueRef emit_int_cmp(struct ac_llvm_context 
*ctx,
LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
return LLVMBuildSelect(ctx->builder, result,
   LLVMConstInt(ctx->i32, 0x, false),
-  LLVMConstInt(ctx->i32, 0, false), "");
+  ctx->i32_0, "");
 }
 
 static LLVMValueRef emit_float_cmp(struct ac_llvm_context *ctx,
@@ -1149,7 +1149,7 @@ static LLVMValueRef emit_float_cmp(struct ac_llvm_context 
*ctx,
result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
return LLVMBuildSelect(ctx->builder, result,
   LLVMConstInt(ctx->i32, 0x, false),
-  LLVMConstInt(ctx->i32, 0, false), "");
+  ctx->i32_0, "");
 }
 
 static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx,
@@ -2140,7 +2140,7 @@ static LLVMValueRef build_tex_intrinsic(struct 
ac_nir_context *ctx,
return ac_build_buffer_load_format(&ctx->ac,
   args->resource,
   args->addr,
-  LLVMConstInt(ctx->ac.i32, 0, 
false),
+  ctx->ac.i32_0,
   true);
}
 
@@ -2265,7 +2265,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
 
params[1] = ctx->abi->load_ssbo(ctx->abi,
get_src(ctx, instr->src[1]), true);
-   params[2] = LLVMConstInt(ctx->ac.i32, 0, false); /* vindex */
+   params[2] = ctx->ac.i32_0; /* vindex */
params[4] = ctx->ac.i1false;  /* glc */
params[5] = ctx->ac.i1false;  /* slc */
 
@@ -2349,7 +2349,7 @@ static LLVMValueRef visit_atomic_ssbo(struct 
ac_nir_context *ctx,
params[arg_count++] = ctx->abi->load_ssbo(ctx->abi,
 get_src(ctx, instr->src[0]),
 true);
-   params[arg_count++] = LLVMConstInt(ctx->ac.i32, 0, false); /* vindex */
+   params[arg_count++] = ctx->ac.i32_0; /* vindex */
params[arg_count++] = get_src(ctx, instr->src[1]);  /* voffset */
params[arg_count++] = LLVMConstInt(ctx->ac.i1, 0, false);  /* slc */
 
@@ -2425,7 +2425,7 @@ static LLVMValueRef visit_load_buffer(struct 
ac_nir_context *ctx,
ctx->abi->load_ssbo(ctx->abi,
get_src(ctx, instr->src[0]),
false),
-   LLVMConstInt(ctx->ac.i32, 0, false),
+   ctx->ac.i32_0,
offset,
ctx->ac.i1false,
ctx->ac.i1false,
@@ -4937,7 +4937,7 @@ static void visit_if(struct ac_nir_context *ctx, nir_if 
*if_stmt)
ctx->ac.context, fn, "");
 
LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, value,
- LLVMConstInt(ctx->ac.i32, 0, false), 
"");
+ ctx->ac.i32_0, "");
LLVMBuildCondBr(ctx->ac.builder, cond, if_block, else_block);
 
LLVMPositionBuilderAtEnd(ctx->ac.builder, if_block);
@@ -5033,7 +5033,7 @@ handle_vs_input_decl(struct nir_to_llvm_context *ctx,
 
input = ac_build_buffer_load_format(&ctx->ac, t_list,
buffer_index,
-   LLVMConstInt(ctx->ac.i32, 
0, false),
+   ctx->ac.i32_0,
true);
 
for (unsigned chan = 0; chan < 4; chan++) {
@@ -6422,7 +6422,7 @@ static void ac_nir_fixup_ls_hs_input_vgprs(struct 
nir_to_llvm_context *ctx)
  LLVMConstInt(ctx->ac.i32, 8, false),
  LLVMConstInt(ctx->ac.i32, 8, false), 
false);
LLVMValueRef hs_empty = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, count,
- LLVMConstInt(ctx->ac.i32, 0, 
false), "");
+ ctx->ac.i32_0, "");
ctx->abi.instance_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, 
ctx->rel_auto_id, ctx->abi.instance_id, "");
ctx->vs_prim_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, 
ctx->abi.vertex_id, ctx->vs_prim_id, "")

[Mesa-dev] [PATCH] radv: export SampleMask from pixel shaders at full rate

2017-12-12 Thread Samuel Pitoiset
Use 16_ABGR instead of 32_ABGR if Z isn't written.

Ported from RadeonSI.

No CTS regressions on Polaris.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_nir_to_llvm.c | 65 ++---
 src/amd/vulkan/radv_pipeline.c  | 29 ++
 2 files changed, 78 insertions(+), 16 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 663b27d265..5916619e97 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -6166,6 +6166,26 @@ si_export_mrt_color(struct nir_to_llvm_context *ctx,
return true;
 }
 
+static unsigned
+si_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
+  bool writes_samplemask)
+{
+   if (writes_z) {
+   /* Z needs 32 bits. */
+   if (writes_samplemask)
+   return V_028710_SPI_SHADER_32_ABGR;
+   else if (writes_stencil)
+   return V_028710_SPI_SHADER_32_GR;
+   else
+   return V_028710_SPI_SHADER_32_R;
+   } else if (writes_stencil || writes_samplemask) {
+   /* Both stencil and sample mask need only 16 bits. */
+   return V_028710_SPI_SHADER_UINT16_ABGR;
+   } else {
+   return V_028710_SPI_SHADER_ZERO;
+   }
+}
+
 static void
 si_export_mrt_z(struct nir_to_llvm_context *ctx,
LLVMValueRef depth, LLVMValueRef stencil,
@@ -6184,19 +6204,42 @@ si_export_mrt_z(struct nir_to_llvm_context *ctx,
args.out[2] = LLVMGetUndef(ctx->ac.f32); /* B, sample mask */
args.out[3] = LLVMGetUndef(ctx->ac.f32); /* A, alpha to mask */
 
-   if (depth) {
-   args.out[0] = depth;
-   args.enabled_channels |= 0x1;
-   }
+   unsigned format = si_get_spi_shader_z_format(depth != NULL,
+stencil != NULL,
+samplemask != NULL);
+
+   if (format == V_028710_SPI_SHADER_UINT16_ABGR) {
+   assert(!depth);
+   args.compr = 1; /* COMPR flag */
+
+   if (stencil) {
+   /* Stencil should be in X[23:16]. */
+   stencil = ac_to_integer(&ctx->ac, stencil);
+   stencil = LLVMBuildShl(ctx->builder, stencil,
+  LLVMConstInt(ctx->ac.i32, 16, 
0), "");
+   args.out[0] = ac_to_float(&ctx->ac, stencil);
+   args.enabled_channels |= 0x3;
+   }
+   if (samplemask) {
+   /* SampleMask should be in Y[15:0]. */
+   args.out[1] = samplemask;
+   args.enabled_channels |= 0xc;
+   }
+   } else {
+   if (depth) {
+   args.out[0] = depth;
+   args.enabled_channels |= 0x1;
+   }
 
-   if (stencil) {
-   args.out[1] = stencil;
-   args.enabled_channels |= 0x2;
-   }
+   if (stencil) {
+   args.out[1] = stencil;
+   args.enabled_channels |= 0x2;
+   }
 
-   if (samplemask) {
-   args.out[2] = samplemask;
-   args.enabled_channels |= 0x4;
+   if (samplemask) {
+   args.out[2] = samplemask;
+   args.enabled_channels |= 0x4;
+   }
}
 
/* SI (except OLAND and HAINAN) has a bug that it only looks
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 0146d6935e..baaf5c4c77 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -2013,6 +2013,25 @@ radv_pipeline_stage_to_user_data_0(struct radv_pipeline 
*pipeline,
}
 }
 
+static unsigned
+si_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
+  bool writes_samplemask)
+{
+   if (writes_z) {
+   /* Z needs 32 bits. */
+   if (writes_samplemask)
+   return V_028710_SPI_SHADER_32_ABGR;
+   else if (writes_stencil)
+   return V_028710_SPI_SHADER_32_GR;
+   else
+   return V_028710_SPI_SHADER_32_R;
+   } else if (writes_stencil || writes_samplemask) {
+   /* Both stencil and sample mask need only 16 bits. */
+   return V_028710_SPI_SHADER_UINT16_ABGR;
+   } else {
+   return V_028710_SPI_SHADER_ZERO;
+   }
+}
 
 static VkResult
 radv_pipeline_init(struct radv_pipeline *pipeline,
@@ -2108,11 +2127,11 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
if (pipeline->device->physical_device->has_rbplus)
pipeline->graphics.db_shader_control |= 
S_02880C_DUAL_QUAD_DISABLE(1);
 
-   pipeline->graphics.shader_z_for

Re: [Mesa-dev] [PATCH 4/4] mesa: enable GL_EXT_disjoint_timer_query extension

2017-12-12 Thread Ian Romanick
On 11/19/2017 11:18 PM, Tapani Pälli wrote:
> Add GL_GPU_DISJOINT_EXT and enable extension when ARB_timer_query
> is supported by the driver.
> 
> Following dEQP cases pass:
>dEQP-EGL.functional.get_proc_address.extension.gl_ext_disjoint_timer_query
>dEQP-EGL.functional.client_extensions.disjoint
> 
> Piglit test 'ext_disjoint_timer_query-simple' passes with these changes.
> 
> No changes/regression observed in Intel CI.
> 
> Signed-off-by: Tapani Pälli 
> ---
>  src/mesa/main/extensions_table.h |  1 +
>  src/mesa/main/get.c  | 17 +
>  src/mesa/main/get_hash_params.py |  5 +
>  src/mesa/main/glheader.h |  4 
>  src/mesa/main/mtypes.h   |  1 +
>  src/mesa/main/queryobj.c |  3 ++-
>  6 files changed, 30 insertions(+), 1 deletion(-)
> 
> diff --git a/src/mesa/main/extensions_table.h 
> b/src/mesa/main/extensions_table.h
> index 5b66e7d30d..78f0d35feb 100644
> --- a/src/mesa/main/extensions_table.h
> +++ b/src/mesa/main/extensions_table.h
> @@ -209,6 +209,7 @@ EXT(EXT_copy_image  , 
> OES_copy_image
>  EXT(EXT_copy_texture, dummy_true 
> , GLL,  x ,  x ,  x , 1995)
>  EXT(EXT_depth_bounds_test   , EXT_depth_bounds_test  
> , GLL, GLC,  x ,  x , 2002)
>  EXT(EXT_discard_framebuffer , dummy_true 
> ,  x ,  x , ES1, ES2, 2009)
> +EXT(EXT_disjoint_timer_query, ARB_timer_query
> ,  x ,  x ,  x , ES2, 2016)
 ^^^

This should be EXT_disjoint_timer_query, right?  All of the things below
seem to use that flag.

>  EXT(EXT_draw_buffers, dummy_true 
> ,  x ,  x ,  x , ES2, 2012)
>  EXT(EXT_draw_buffers2   , EXT_draw_buffers2  
> , GLL, GLC,  x ,  x , 2006)
>  EXT(EXT_draw_buffers_indexed, ARB_draw_buffers_blend 
> ,  x ,  x ,  x ,  30, 2014)
> diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
> index ea8d932b18..6c0d1f7e9a 100644
> --- a/src/mesa/main/get.c
> +++ b/src/mesa/main/get.c
> @@ -578,6 +578,13 @@ static const int extra_EXT_provoking_vertex_32[] = {
> EXTRA_END
>  };
>  
> +static const int extra_EXT_disjoint_timer_query[] = {
> +   EXTRA_API_ES2,
> +   EXTRA_API_ES3,
> +   EXT(EXT_disjoint_timer_query),
> +   EXTRA_END
> +};
> +
>  
>  /* This is the big table describing all the enums we accept in
>   * glGet*v().  The table is partitioned into six parts: enums
> @@ -1151,6 +1158,16 @@ find_custom_value(struct gl_context *ctx, const struct 
> value_desc *d, union valu
>   }
>}
>break;
> +   /* GL_EXT_disjoint_timer_query */
> +   case GL_GPU_DISJOINT_EXT:
> +  {
> + simple_mtx_lock(&ctx->Shared->Mutex);
> + v->value_int = ctx->Shared->DisjointOperation;
> + /* Reset state as expected by the spec. */
> + ctx->Shared->DisjointOperation = false;
> + simple_mtx_unlock(&ctx->Shared->Mutex);
> +  }
> +  break;
> }
>  }
>  
> diff --git a/src/mesa/main/get_hash_params.py 
> b/src/mesa/main/get_hash_params.py
> index 20ef6e4977..55a956da84 100644
> --- a/src/mesa/main/get_hash_params.py
> +++ b/src/mesa/main/get_hash_params.py
> @@ -254,6 +254,11 @@ descriptor=[
>[ "POINT_SIZE_ARRAY_BUFFER_BINDING_OES", "LOC_CUSTOM, TYPE_INT, 0, 
> NO_EXTRA" ],
>  ]},
>  
> +# Enums in GLES2, GLES3
> +{ "apis": ["GLES2", "GLES3"], "params": [
> +  [ "GPU_DISJOINT_EXT", "LOC_CUSTOM, TYPE_INT, 0, 
> extra_EXT_disjoint_timer_query" ],
> +]},
> +
>  { "apis": ["GL", "GL_CORE", "GLES2"], "params": [
>  # == GL_MAX_TEXTURE_COORDS_NV
>[ "MAX_TEXTURE_COORDS_ARB", "CONTEXT_INT(Const.MaxTextureCoordUnits), 
> extra_ARB_fragment_program" ],
> diff --git a/src/mesa/main/glheader.h b/src/mesa/main/glheader.h
> index 3f2a923782..35a442a77b 100644
> --- a/src/mesa/main/glheader.h
> +++ b/src/mesa/main/glheader.h
> @@ -144,6 +144,10 @@ typedef void *GLeglImageOES;
>  #define GL_FRAGMENT_SHADER_DISCARDS_SAMPLES_EXT 0x8A52
>  #endif
>  
> +#ifndef GL_EXT_disjoint_timer_query
> +#define GL_GPU_DISJOINT_EXT 0x8FBB
> +#endif
> +
>  /* Inexplicably, GL_HALF_FLOAT_OES has a different value than GL_HALF_FLOAT.
>   */
>  #ifndef GL_HALF_FLOAT_OES
> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
> index 4cb6a74d69..9ed8d86688 100644
> --- a/src/mesa/main/mtypes.h
> +++ b/src/mesa/main/mtypes.h
> @@ -4143,6 +4143,7 @@ struct gl_extensions
> GLboolean EXT_blend_func_separate;
> GLboolean EXT_blend_minmax;
> GLboolean EXT_depth_bounds_test;
> +   GLboolean EXT_disjoint_timer_query;
> GLboolean EXT_draw_buffers2;
> GLboolean EXT_framebuffer_multisample;
> GLboolean EXT_framebuffer_multisample_blit_scaled;
> diff --git a/src/mesa/main/queryobj.c 

Re: [Mesa-dev] [PATCH] mesa: remove second include of errors.h in src/mesa/main/glspirv.c

2017-12-12 Thread Ian Romanick
Weird that this was not noticed before... *shrug*

Reviewed-by: Ian Romanick 

On 12/12/2017 07:20 AM, Kai Wasserbäch wrote:
> Cc: Nicolai Hähnle 
> Fixes: 5bc03d2508 ("mesa: implement SPIR-V loading in glShaderBinary")
> Signed-off-by: Kai Wasserbäch 
> ---
>  src/mesa/main/glspirv.c | 4 
>  1 file changed, 4 deletions(-)
> 
> diff --git a/src/mesa/main/glspirv.c b/src/mesa/main/glspirv.c
> index 7eb8f906c2..81303057d0 100644
> --- a/src/mesa/main/glspirv.c
> +++ b/src/mesa/main/glspirv.c
> @@ -22,11 +22,7 @@
>   */
>  
>  #include "glspirv.h"
> -
> -#include "errors.h"
> -
>  #include "errors.h"
> -
>  #include "util/u_atomic.h"
>  
>  void
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Gallium ARB_get_program_binary support

2017-12-12 Thread Mike Lothian
Game works fine, not sure if it's using the extension or not

On Mon, 11 Dec 2017 at 23:21 Mike Lothian  wrote:

> I'll check that when I'm next home
>
> On Mon, 11 Dec 2017 at 21:19 Timothy Arceri  wrote:
>
>> On 12/12/17 08:13, Mike Lothian wrote:
>> > Tested with Dead Island, Dirt Rally and Serious Sam BFG (which I believe
>> > uses this extension)
>> >
>> > Do you know any other games that use this?
>>
>> I believe Dying Light uses it also.
>>
>>
>> >
>> > On Mon, 11 Dec 2017 at 02:17 Dieter Nützel > > > wrote:
>> >
>> > For the V2 series:
>> >
>> > Tested-by: Dieter Nützel > > >
>> > and
>> > Acked-by: Dieter Nützel > > >
>> > if this is worth from my side.
>> >
>> > on RX580 8GB
>> >
>> > with DiRT Rally, UH, UV, Blender 2.79, smoketest, F1 2017 (Vulkan),
>> > glmark2 (parallel with
>> > OpenCL (/opt/opencl-example>./run_tests.sh))
>> >
>> > Dieter
>> >
>> > Am 10.12.2017 23:06, schrieb Timothy Arceri:
>> >  > On 10/12/17 17:56, Dieter Nützel wrote:
>> >  >> First BAD commit is patch [PATCH 2/5].
>> >  >> https://patchwork.freedesktop.org/patch/192329/
>> >  >
>> >  >
>> >  > Thanks! I've sent a V2 of that patch, I've also confirmed DiRT
>> Rally
>> >  > no longer segfaults at start-up.
>> >  >
>> >  >> With it I get this in dmesg:
>> >  >>
>> >  >> [34581.836122] OGL_Dispatch_33[4887]: segfault at 0 ip
>> >  >> 9ebeafda sp a95580d4 error 4 in
>> >  >> libc-2.26.so [7f6afcaf7000+1b1000]
>> >  >> [34706.224082] perf: interrupt took too long (5089 > 4913),
>> lowering
>> >  >> kernel.perf_event_max_sample_rate to 39250
>> >  >> [34918.80] OGL_Dispatch_33[19897]: segfault at 0 ip
>> >  >> d9adbd7a sp e6e1b769 error 4 in
>> >  >> libc-2.26.so [7f2fc4777000+1b1000]
>> >  >> [35148.576818] OGL_Dispatch_33[2934]: segfault at 0 ip
>> >  >> 78a41511 sp 1f147296 error 4 in
>> >  >> libc-2.26.so [7ff11fa23000+1b1000]
>> >  >> [35172.410621] OGL_Dispatch_33[3371]: segfault at 0 ip
>> >  >> ef37ae37 sp 99ce37ab error 4 in
>> >  >> libc-2.26.so [7f0af78ff000+1b1000]
>> >  >> [35383.940291] OGL_Dispatch_33[18591]: segfault at 0 ip
>> >  >> 813f92d2 sp 7c9305e7 error 4 in
>> >  >> libc-2.26.so [7fee87426000+1b1000]
>> >  >>
>> >  >> It's in the morning, now.
>> >  >> Here in 'OLD' Germany, near Hamburg.
>> >  >> Good night! ;-)
>> >  >>
>> >  >> Dieter
>> >  >>
>> >  >> Am 10.12.2017 05:15, schrieb Timothy Arceri:
>> >  >>> Hi, as always thanks for testing :)
>> >  >>>
>> >  >>> Are you able to find out which patch causes the crash? I'm
>> > curious if
>> >  >>> its the ARB_get_program_binary support or if I break the
>> regular
>> >  >>> cache
>> >  >>> when re-factoring.
>> >  >>>
>> >  >>> Thanks,
>> >  >>> Tim
>> >  >>>
>> >  >>> On 09/12/17 15:58, Dieter Nützel wrote:
>> >   Hello Tim,
>> >  
>> >   first time ever, that I got a real regression with your GREAT
>> > work.
>> >   ;-)
>> >  
>> >   DiRT Rally sig fault (SIGSEGV(11) with this series.
>> >   (see attachment)
>> >  
>> >   Bad cache hit re-read?
>> >   Removing .cache/mesa_shader_cache do not help.
>> >   Searching for the rigth Feral (Steam?) cache dir. -
>> Alex/James?
>> >  
>> >   Then I'll have to find which of the five did it.
>> >  
>> >    From the log I got this:
>> >   [1209/054156:ERROR:sandbox_linux.cc(325)] InitializeSandbox()
>> > called
>> >   with multiple threads in process gpu-process
>> >   [1209/054157:INFO:CONSOLE(0)] "The specified value '!' does
>> not
>> >   conform to the required format.  The format is '#rrggbb'
>> where rr,
>> >   gg, bb are two-digit hexadecimal numbers.", source:
>> >  
>> >
>>  
>> file://localhost/home1/alexander/My%20Games/Steam/steamapps/common/DiRT%20Rally/share/FeralUI/PGOW/Core/feralUI.html
>> >   (0)
>> >   Installing breakpad exception handler for
>> >   appid(steam)/version(1509425745)
>> >   [1209/054158:INFO:CONSOLE(0)] "Synchronous XMLHttpRequest on
>> the
>> >   main thread is deprecated because of its detrimental effects
>> > to the
>> >   end user's experience. For more help, check
>> >   http://xhr.spec.whatwg.org/.";, source:  (0)
>> >   [1209/054208:WARNING:x11_util.cc(1490)] X error received:
>> serial
>> >   4814, error_code 3 (BadWindow (invalid Window parameter)),
>> >   request_code 4, minor_code 0 (X_DestroyWindow)
>> >

Re: [Mesa-dev] [Mesa-stable] [PATCH 1/2] i965/bufmgr: Add a helper to mark a BO as external

2017-12-12 Thread Jason Ekstrand
On Tue, Dec 12, 2017 at 5:42 AM, Emil Velikov 
wrote:

> On 11 December 2017 at 22:03, Jason Ekstrand  wrote:
> > On Mon, Dec 11, 2017 at 12:08 PM, Emil Velikov  >
> > wrote:
> >>
> >> On 21 November 2017 at 00:13, Andres Gomez  wrote:
> >> > Jason, this nominated series landed without mentioning any specific
> >> > stable queue.
> >> >
> >> > From what I'm seeing, both depend on 2c4097aff1b which didn't make it
> >> > for 17.2 so I'm dropping them for that queue.
> >> >
> >> This is a preparatory patch for the follow-up commit.
> >> With the latter addressing issue caused by
> >> 4b1e70cc57d7ff5f465544644b2180dee1490cee - only available in
> >> 17.4.0-dev.
> >>
> >> I believe the 17.2 and 17.3 series should be safe ;-)
> >
> >
> > This bug goes all the way back.  It interacts with the patch mentioned
> but
> > is not the same thing.  This patch (and the subsequent one) should
> probably
> > go back as far as brw_bufmgr does.
> Having a closer look at the patches related to external buffers, shows
> the following:
>
> 2c4097aff1b i965: Only put external handles into the handle ht
> 17.3 only
>
> d7a19d69ebc i965: Use PTE MOCS for all external buffers
> 17.4 + 17.3
> 4b1e70cc57d i965: Switch over to fully external-or-not MOCS scheme
> 17.4 only
>
> 344252a27f8 i965/bufmgr: Add a helper to mark a BO as external
> 0a6a137eb27 i965: Mark BOs as external when we export their handle
> These fixes
>
> With the above in mind, it seems that 4b1e70cc57d should have been
> tagged for 17.3?
>

It's not strictly necessary but probably makes back-porting easier.  I'm
fine with pulling it into 17.3.


> On the 17.2 front: Andres is right - commit 2c4097aff1b seems like a
> bare minimum for this series.
> Considering how well into the 17.2 series we are, I'm a bit worried
> about picking if :-\
>

Agreed.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: remove second include of errors.h in src/mesa/main/glspirv.c

2017-12-12 Thread Kai Wasserbäch
Cc: Nicolai Hähnle 
Fixes: 5bc03d2508 ("mesa: implement SPIR-V loading in glShaderBinary")
Signed-off-by: Kai Wasserbäch 
---
 src/mesa/main/glspirv.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/src/mesa/main/glspirv.c b/src/mesa/main/glspirv.c
index 7eb8f906c2..81303057d0 100644
--- a/src/mesa/main/glspirv.c
+++ b/src/mesa/main/glspirv.c
@@ -22,11 +22,7 @@
  */
 
 #include "glspirv.h"
-
-#include "errors.h"
-
 #include "errors.h"
-
 #include "util/u_atomic.h"
 
 void
-- 
2.15.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH mesa] meson: add dep_thread to every lib that includes threads.h

2017-12-12 Thread Emil Velikov
On 11 December 2017 at 22:22, Dylan Baker  wrote:
> Quoting Emil Velikov (2017-12-11 12:06:35)
>> On 7 December 2017 at 17:25, Dylan Baker  wrote:
>> > Quoting Emil Velikov (2017-12-07 08:40:27)
>> >> On 7 December 2017 at 14:51, Eric Engestrom  
>> >> wrote:
>> >> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104141
>> >> > Signed-off-by: Eric Engestrom 
>> >> > ---
>> >> >  src/broadcom/meson.build| 2 +-
>> >> >  src/gallium/auxiliary/meson.build   | 2 +-
>> >> >  src/gallium/state_trackers/nine/meson.build | 1 +
>> >> >  src/gallium/targets/xa/meson.build  | 2 +-
>> >> >  src/gallium/targets/xvmc/meson.build| 2 +-
>> >> >  src/gbm/meson.build | 2 +-
>> >> >  src/intel/common/meson.build| 2 +-
>> >> >  src/loader/meson.build  | 2 +-
>> >> >  src/util/meson.build| 2 +-
>> >> >  9 files changed, 9 insertions(+), 8 deletions(-)
>> >> >
>> >> I doubt we can continue and pretend to be libpthread.so free.
>> >> To make it even funnier, depending on moon cycle or other fun factors,
>> >> we could get the pthread dependency implicitly satisfied as one of the
>> >> other shared libraries already pulls the library.
>> >>
>> >> So how about we simply append -pthread to CC/CXX with at global scope
>> >> and drop the all the individual dependencies?
>> >> It will safe us a few characters to type, plus will ensure that newly
>> >> added binaries don't fall victim of the same issue.
>> >
>> > Absolutely not. The meson build has dep_thread for a reason, because meson
>> > guarantees that calling `dependency('threads')` will always return the 
>> > right
>> > value for your platform, even if that platform is windows and doesn't have
>> > pthreads at all (but does the right thing for cygwin).
>> >
>> I would recommend looking through clang/gcc. AFAICS any* platform/arch
>> combo supported by Mesa handles -pthread and that toggle does the
>> "right thing".
>> Obviously that can seem a bit hacky, so a better way to avoid all the
>> copy/paste is for meson to grow an option that allows folding the
>> required cflags/libs with the compiler directive.
>
> That's all fine, but the meson build is planning on supporting haiku and plain
> windows (with msvc), neither of which have pthreads (haiku does, but it's not 
> a
> standalone library and you don't pass -pthreads to the compiler or linker and
> it's an error to do so). macOS clang also warns when passing -pthreads to the
> linker (but only the one shipped with xcode), not if you build clang yourself.
>
> If you feel strongly about it, open a bug upstream and discuss it with 
> upstream.
> If they agree and add a mechanism to do so I'd be fine using it.
>
>> > The reason that we're running into this problem is as you guessed that some
>> > dependencies pull in pthreads implicitly, for example LLVM, which is why 
>> > we're
>> > seeing this so often in gallium.
>> >
>> Precisely. Due to the combinatoric explosions things are bound to
>> break again, hence my earlier suggestion.
>> I doubt you or anyone on the team will be excited to see things break.
>
> That's possible, obviously. I also think these sort of issues will work
> themselves out fairly quickly, while I'm very concerned adding -pthread into 
> the
> list of arguments we pass unconditionally is going to break whole platforms in
> subtle and hard to fix ways, and really goes against the philosophy of meson,
> which is to solve these sort of problems in meson itself, rather than each 
> build
> system solving them again and again, usually incorrectly.
>
> If we want to trot out the big hammer, I'd be happier just to add dep_thread 
> to
> every shared library and binary than trying to add the right combination of
> -pthreads and -lpthreads for each platform ourselves to the C and C++ flags.
>
> There's about 350 uses of pthread symbols in mesa itself, of that there are 56
> unique files containing pthread symbols (some of which are generators), and of
> that there are only 23 unique folders containing pthread symbols. I think that
> getting this right is very doable.
>
> I'll start auditing the meson build to see if there's any place that we're
> missing passing pthreads directly.
>
Guess I should have made it more obvious:

I'm trying to save you (amongst others) the annoyance as things break
- since they will break :-(
It's entirely up-to you to decide on the best approach to mitigate or
even avoid that.

HTH
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/8] spirv: Add a prepass to set types on vtn_values

2017-12-12 Thread Rob Herring
On Thu, Dec 7, 2017 at 10:12 AM, Jason Ekstrand  wrote:
> This autogenerated pass will automatically find and set the type field
> on all vtn_values.  This way we always have the type and can use it for
> validation and other checks.
> ---
>  src/compiler/Makefile.nir.am |   4 +
>  src/compiler/nir/meson.build |  11 ++-
>  src/compiler/spirv/spirv_to_nir.c|   6 +-
>  src/compiler/spirv/vtn_gather_types_c.py | 125 
> +++
>  src/compiler/spirv/vtn_private.h |   4 +
>  5 files changed, 148 insertions(+), 2 deletions(-)
>  create mode 100644 src/compiler/spirv/vtn_gather_types_c.py

This is missing the necessary android changes.

Rob
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] glx: Lift sending the MakeCurrent request to top-level code (v2)

2017-12-12 Thread Emil Velikov
On 6 December 2017 at 19:25, Adam Jackson  wrote:
> On Wed, 2017-12-06 at 15:14 +, Emil Velikov wrote:
>
>> > -  if (gc->vtable->bind(gc, oldGC, draw, read) != Success) {
>> > +  if (gc->vtable->bind(gc, gc, draw, read) != Success) {
>> >   __glXSetCurrentContextNull();
>>
>> This line seems inconsistent/wrong.
>>
>> The glXMakeCurrent manpage says "If False is returned, the previously
>> current rendering context and drawable (if any) remain unchanged."
>
> Ugh. That's not really possible to get perfectly right, there are
> unrecoverable states (think MakeCurrent away from a context that's been
> deleted, or whose current drawable is a destroyed window). Still, I
> suppose we should try at least a little.
>
From a quick look delaying the unbind call should address that. I'd
drop the __glXSetCurrentContextNull call for now and keep the rest of
the yak shaving at a later stage.

Just noticed some copy/paste damage

 indirect_bind_context(struct glx_context *gc, struct glx_context *old,
  GLXDrawable draw, GLXDrawable read)
 {

+* cleverness before the GetString calls.
+*/
+   if (state && state->array_state == NULL) {
+  gc->currentDpy = gc->psc->dpy;
+  __glXSetCurrentContext(gc);
+  __glXSetCurrentContext(gc);

__glXSetCurrentContext should be only once.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] gallium/util: add u_transfer_helper

2017-12-12 Thread Rob Clark
Add a new helper that drivers can use to emulate various things that
need special handling in particular in transfer_map:

 1) z32_s8x24.. gl/gallium treats this as a single buffer with depth
and stencil interleaved but hardware frequently treats this as
separate z32 and s8 buffers.  Special pack/unpack handling is
needed in transfer_map/unmap to pack/unpack the exposed buffer

 2) fake RGTC.. GPUs designed with GLES in mind, but which can other-
wise do GL3, if native RGTC is not supported it can be emulated
by converting to uncompressed internally, but needs pack/unpack
in transfer_map/unmap

 3) MSAA resolves in the transfer_map() case

v2: add MSAA resolve based on Eric's "gallium: Add helpers for MSAA
resolves in pipe_transfer_map()/unmap()." patch; avoid wrapping
pipe_resource, to make it possible for drivers to use both this
and threaded_context.

Signed-off-by: Rob Clark 
---
 src/gallium/auxiliary/Makefile.sources |   2 +
 src/gallium/auxiliary/meson.build  |   2 +
 src/gallium/auxiliary/util/u_transfer_helper.c | 486 +
 src/gallium/auxiliary/util/u_transfer_helper.h | 132 +++
 src/gallium/include/pipe/p_screen.h|   8 +-
 5 files changed, 629 insertions(+), 1 deletion(-)
 create mode 100644 src/gallium/auxiliary/util/u_transfer_helper.c
 create mode 100644 src/gallium/auxiliary/util/u_transfer_helper.h

diff --git a/src/gallium/auxiliary/Makefile.sources 
b/src/gallium/auxiliary/Makefile.sources
index f40c4723fae..a2dae04698c 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -304,6 +304,8 @@ C_SOURCES := \
util/u_tile.h \
util/u_transfer.c \
util/u_transfer.h \
+   util/u_transfer_helper.c \
+   util/u_transfer_helper.h \
util/u_threaded_context.c \
util/u_threaded_context.h \
util/u_threaded_context_calls.h \
diff --git a/src/gallium/auxiliary/meson.build 
b/src/gallium/auxiliary/meson.build
index 3e623fd099f..8c242ec1a05 100644
--- a/src/gallium/auxiliary/meson.build
+++ b/src/gallium/auxiliary/meson.build
@@ -324,6 +324,8 @@ files_libgallium = files(
   'util/u_tile.h',
   'util/u_transfer.c',
   'util/u_transfer.h',
+  'util/u_transfer_helper.c',
+  'util/u_transfer_helper.h',
   'util/u_threaded_context.c',
   'util/u_threaded_context.h',
   'util/u_threaded_context_calls.h',
diff --git a/src/gallium/auxiliary/util/u_transfer_helper.c 
b/src/gallium/auxiliary/util/u_transfer_helper.c
new file mode 100644
index 000..6f7a36a5f6d
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_transfer_helper.c
@@ -0,0 +1,486 @@
+/*
+ * Copyright © 2017 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_screen.h"
+
+#include "util/u_box.h"
+#include "util/u_format.h"
+#include "util/u_format_rgtc.h"
+#include "util/u_format_zs.h"
+#include "util/u_inlines.h"
+#include "util/u_transfer_helper.h"
+
+
+struct u_transfer_helper {
+   const struct u_transfer_vtbl *vtbl;
+   bool separate_z32s8;
+   bool fake_rgtc;
+   bool msaa_map;
+};
+
+static inline bool handle_transfer(struct pipe_resource *prsc)
+{
+   struct u_transfer_helper *helper = prsc->screen->transfer_helper;
+
+   if (helper->vtbl->get_internal_format) {
+  enum pipe_format internal_format =
+helper->vtbl->get_internal_format(prsc);
+  if (internal_format != prsc->format)
+ return true;
+   }
+
+   if (helper->msaa_map && (prsc->nr_samples > 1))
+  return true;
+
+   return false;
+}
+
+/* The pipe_transfer ptr could either be the driver's, or u_transfer,
+ * depending on whether we are intervening or not.  Check handle_transfer()
+ * before dereferencing.
+ */
+struct u_transfer {
+   struct pipe_transfer base;
+   /* Note that in case of MSAA resolve for transfer plus z32s8 or fake rgtc
+* we end up with stacked u_tr

Re: [Mesa-dev] [PATCH 1/3] glx: Move vertex array protocol state into the indirect backend (v2)

2017-12-12 Thread Emil Velikov
On 6 December 2017 at 18:12, Adam Jackson  wrote:
> On Wed, 2017-12-06 at 14:50 +, Emil Velikov wrote:
>
>> > +   * have setup the context, as it needs to query server attributes.
>> > +   *
>> > +   * At the point this is called gc->currentDpy is not initialized
>> > +   * nor is the thread's current context actually set. Hence the
>> > +   * cleverness before the GetString calls.
>> > +   */
>> > +  __GLXattribute *state = gc->client_state_private;
>> > +  if (state && state->array_state == NULL) {
>> > + gc->currentDpy = gc->psc->dpy;
>> > + __glXSetCurrentContext(gc);
>>
>> Unless I'm misreading the SendMakeCurrentRequest rework (patch 2/3)
>> __glXSetCurrentContext() will be called, hence these two lines +
>> respective comment could be omitted.
>
> Pretty sure you're misreading something. This is the ->bind hook, if it
> succeeds then MakeContextCurrent will call __glXSetCurrentContext.
> Since we have not yet returned, we have not yet succeeded, and
> __glXSetCurrentContext has not yet been called, so we must do it
> ourselves.
>
Right my bad. Could swear I saw a __glXSetCurrentContext call before
the ->bind() one.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/4] enable GL_EXT_disjoint_timer_query

2017-12-12 Thread Lionel Landwerlin

Hey Tapani,

We had a bit of a discussion on IRC about the patch 2.
Our per context timestamps are 36bits long and so roughly loop around 
every 90minutes on gen9.

i965 already knows how to handle the counter wrapping around 36bits.
And since there is no way to measure a more than ~90minutes delta, I 
don't think patch 2 is necessary.


Otherwise this series looks good to me (you might want someone else to 
double check the API/xml stuff that I'm not super familiar with) :


Reviewed-by: Lionel Landwerlin 


On 20/11/17 07:18, Tapani Pälli wrote:

This series enables mostly existing functionality but adds the disjoint
state required by this extension. WebGL tests seem to utilize desktop
extension GL_EXT_timer_query but I wrote a separate simple Piglit test
for this.

Tapani Pälli (4):
   mesa: add DisjointOperation to gl_shared_state
   i965: set DisjointOperation true on timestamp overflow
   glapi: add GL_EXT_disjoint_timer_query
   mesa: enable GL_EXT_disjoint_timer_query extension

  src/mapi/glapi/gen/es_EXT.xml| 16 
  src/mapi/glapi/gen/gl_API.xml|  4 ++--
  src/mesa/drivers/dri/i965/brw_queryobj.c |  4 
  src/mesa/main/extensions_table.h |  1 +
  src/mesa/main/get.c  | 17 +
  src/mesa/main/get_hash_params.py |  5 +
  src/mesa/main/glheader.h |  4 
  src/mesa/main/mtypes.h   |  9 +
  src/mesa/main/queryobj.c |  3 ++-
  src/mesa/main/robustness.c   |  1 +
  src/mesa/main/tests/dispatch_sanity.cpp  |  5 +
  11 files changed, 66 insertions(+), 3 deletions(-)



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4] meson: set opencl flags for r600

2017-12-12 Thread Eric Engestrom
On Monday, 2017-12-11 11:50:01 -0800, Dylan Baker wrote:
> Quoting Eric Engestrom (2017-12-11 07:55:30)
> > On Friday, 2017-12-08 16:27:21 -0800, Dylan Baker wrote:
> > > Signed-off-by: Dylan Baker 
> > 
> > Should come after the current 4/4, but
> > Reviewed-by: Eric Engestrom 
> > 
> > > ---
> > >  src/gallium/drivers/r600/meson.build | 7 +--
> > >  1 file changed, 5 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/src/gallium/drivers/r600/meson.build 
> > > b/src/gallium/drivers/r600/meson.build
> > > index 2132dbb33ad..5899518a2e8 100644
> > > --- a/src/gallium/drivers/r600/meson.build
> > > +++ b/src/gallium/drivers/r600/meson.build
> > > @@ -113,12 +113,15 @@ egd_tables_h = custom_target(
> > >capture : true,
> > >  )
> > >  
> > > -# TODO: compute defines
> > > +r600_c_args = []
> > > +if with_gallium_opencl
> > > +  r600_c_args += '-DHAVE_OPENCL'
> > > +endif
> > >  
> > >  libr600 = static_library(
> > >'r600',
> > >[files_r600, egd_tables_h],
> > > -  c_args : [c_vis_args],
> > > +  c_args : [c_vis_args, r600_c_args],
> > >cpp_args : [cpp_vis_args],
> > >include_directories : [
> > >  inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_amd_common,
> > > -- 
> > > 2.15.1
> > > 
> 
> r600 needs this to work with clover at all. Since it's so minimal, how would 
> you
> feel about just squashing this into 4/4?

You mean r600 would be broken after 4/4 if 3/4 wasn't applied?
I'm OK with squashing them if so.

My issue was just that with this order, the build would be broken
between 3/4 and 4/4 because `with_gallium_opencl` doesn't exist yet.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4 06/10] mesa/st/glsl_to_tgsi: Add tracking of ifelse writes in register merging

2017-12-12 Thread Gert Wollny
Improve the life-time evaluation of temporary registers by also tracking
writes in both if and else branches and in up to 32 nested scopes.
As a result the estimated required register life-times can be further
reduced enabling more registers to be merged.

Signed-off-by: Gert Wollny 
---
 .../state_tracker/st_glsl_to_tgsi_temprename.cpp   | 312 +++--
 1 file changed, 292 insertions(+), 20 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.cpp
index c4f4622feb..574e43bfe1 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.cpp
@@ -102,15 +102,19 @@ public:
int begin() const;
int loop_break_line() const;
 
+   const prog_scope *in_else_scope() const;
const prog_scope *in_ifelse_scope() const;
-   const prog_scope *in_switchcase_scope() const;
+   const prog_scope *in_parent_ifelse_scope() const;
const prog_scope *innermost_loop() const;
const prog_scope *outermost_loop() const;
const prog_scope *enclosing_conditional() const;
 
bool is_loop() const;
bool is_in_loop() const;
+   bool is_switchcase_scope_in_loop() const;
bool is_conditional() const;
+   bool is_child_of(const prog_scope *scope) const;
+   bool is_child_of_ifelse_id_sibling(const prog_scope *scope) const;
 
bool break_is_for_switchcase() const;
bool contains_range_of(const prog_scope& other) const;
@@ -141,25 +145,81 @@ private:
prog_scope *storage;
 };
 
+/* Class to track the access to a component of a temporary register. */
+
 class temp_comp_access {
 public:
temp_comp_access();
+
void record_read(int line, prog_scope *scope);
void record_write(int line, prog_scope *scope);
lifetime get_required_lifetime();
 private:
void propagate_lifetime_to_dominant_write_scope();
+   bool conditional_ifelse_write_in_loop() const;
+
+   void record_ifelse_write(const prog_scope& scope);
+   void record_if_write(const prog_scope& scope);
+   void record_else_write(const prog_scope& scope);
 
prog_scope *last_read_scope;
prog_scope *first_read_scope;
prog_scope *first_write_scope;
+
int first_write;
int last_read;
int last_write;
int first_read;
-   bool keep_for_full_loop;
+
+   /* This member variable tracks the current resolution of conditional writing
+* to this temporary in IF/ELSE clauses.
+*
+* The initial value "conditionality_untouched" indicates that this
+* temporary has not yet been written to within an if clause.
+*
+* A positive (other than "conditionality_untouched") number refers to the
+* last loop id for which the write was resolved as unconditional. With each
+* new loop this value will be overwitten by "conditionality_unresolved"
+* on entering the first IF clause writing this temporary.
+*
+* The value "conditionality_unresolved" indicates that no resolution has
+* been achieved so far. If the variable is set to this value at the end of
+* the processing of the whole shader it also indicates a conditional write.
+*
+* The value "write_is_conditional" marks that the variable is written
+* conditionally (i.e. not in all relevant IF/ELSE code path pairs) in at
+* least one loop.
+*/
+   int conditionality_in_loop_id;
+
+   /* Helper constants to make the tracking code more readable. */
+   static const int write_is_conditional = -1;
+   static const int conditionality_unresolved = 0;
+   static const int conditionality_untouched;
+
+   /* A bit field tracking the nexting levels of if-else clauses where the
+* temporary has (so far) been written to in the if branch, but not in the
+* else branch.
+*/
+   unsigned int if_scope_write_flags;
+
+   int next_ifelse_nesting_depth;
+   static const int supported_ifelse_nesting_depth = 32;
+
+   /* Tracks the last if scope in which the temporary was written to
+* without a write in the correspondig else branch. Is also used
+* to track read-before-write in the according scope.
+*/
+   const prog_scope *current_unpaired_if_write_scope;
+
+   /* Flag to resolve read-before-write in the else scope. */
+   bool was_written_in_current_else_scope;
 };
 
+const int
+temp_comp_access::conditionality_untouched = numeric_limits::max();
+
+/* Class to track the access to all components of a temporary register. */
 class temp_access {
 public:
temp_access();
@@ -262,6 +322,32 @@ const prog_scope *prog_scope::outermost_loop() const
return loop;
 }
 
+bool prog_scope::is_child_of_ifelse_id_sibling(const prog_scope *scope) const
+{
+   const prog_scope *my_parent = in_parent_ifelse_scope();
+   while (my_parent) {
+  /* is a direct child? */
+  if (my_parent == scope)
+ return false;
+  /* is a child of the conditions sibling? */
+  if (my_parent->id() == scope->id())
+ return true;
+  my_parent = my_parent->in_pa

Re: [Mesa-dev] [Mesa-stable] [PATCH 1/2] i965/bufmgr: Add a helper to mark a BO as external

2017-12-12 Thread Emil Velikov
On 11 December 2017 at 22:03, Jason Ekstrand  wrote:
> On Mon, Dec 11, 2017 at 12:08 PM, Emil Velikov 
> wrote:
>>
>> On 21 November 2017 at 00:13, Andres Gomez  wrote:
>> > Jason, this nominated series landed without mentioning any specific
>> > stable queue.
>> >
>> > From what I'm seeing, both depend on 2c4097aff1b which didn't make it
>> > for 17.2 so I'm dropping them for that queue.
>> >
>> This is a preparatory patch for the follow-up commit.
>> With the latter addressing issue caused by
>> 4b1e70cc57d7ff5f465544644b2180dee1490cee - only available in
>> 17.4.0-dev.
>>
>> I believe the 17.2 and 17.3 series should be safe ;-)
>
>
> This bug goes all the way back.  It interacts with the patch mentioned but
> is not the same thing.  This patch (and the subsequent one) should probably
> go back as far as brw_bufmgr does.
Having a closer look at the patches related to external buffers, shows
the following:

2c4097aff1b i965: Only put external handles into the handle ht
17.3 only

d7a19d69ebc i965: Use PTE MOCS for all external buffers
17.4 + 17.3
4b1e70cc57d i965: Switch over to fully external-or-not MOCS scheme
17.4 only

344252a27f8 i965/bufmgr: Add a helper to mark a BO as external
0a6a137eb27 i965: Mark BOs as external when we export their handle
These fixes

With the above in mind, it seems that 4b1e70cc57d should have been
tagged for 17.3?

On the 17.2 front: Andres is right - commit 2c4097aff1b seems like a
bare minimum for this series.
Considering how well into the 17.2 series we are, I'm a bit worried
about picking if :-\

What do you guys think - are we save to leave the lot out for 17.2 or
it's an absolute must?

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4 09/10] mesa/st/tests: Add tests for lifetime tracking with indirect addressing

2017-12-12 Thread Gert Wollny
 Add a code line type that accepts one layer of indirect addressing and
 add tests to check that temporary register access used for indirect
 addressing is accounted for in the lifetime estimation.

Signed-off-by: Gert Wollny 
---
 src/mesa/state_tracker/tests/st_tests_common.cpp   | 95 +-
 src/mesa/state_tracker/tests/st_tests_common.h | 12 ++-
 .../tests/test_glsl_to_tgsi_lifetime.cpp   | 86 
 3 files changed, 189 insertions(+), 4 deletions(-)

diff --git a/src/mesa/state_tracker/tests/st_tests_common.cpp 
b/src/mesa/state_tracker/tests/st_tests_common.cpp
index 91fd39343f..c6563472a8 100644
--- a/src/mesa/state_tracker/tests/st_tests_common.cpp
+++ b/src/mesa/state_tracker/tests/st_tests_common.cpp
@@ -87,6 +87,30 @@ FakeCodeline::FakeCodeline(unsigned _op, const 
vector>& _dst,
});
 }
 
+FakeCodeline::FakeCodeline(unsigned _op, const vector>& 
_dst,
+   const vector>& _src,
+   const vector>&_to, RA 
with_reladdr):
+   op(_op),
+   max_temp_id(0)
+{
+   (void)with_reladdr;
+
+   transform(_dst.begin(), _dst.end(), std::back_inserter(dst),
+ [this](const tuple& r) {
+  return create_dst_register(r);
+   });
+
+   transform(_src.begin(), _src.end(), std::back_inserter(src),
+ [this](const tuple& r) {
+  return create_src_register(r);
+   });
+
+   transform(_to.begin(), _to.end(), std::back_inserter(tex_offsets),
+ [this](const tuple& r) {
+  return create_src_register(r);
+   });
+}
+
 FakeCodeline::FakeCodeline(const glsl_to_tgsi_instruction& instr):
op(instr.op),
max_temp_id(0)
@@ -193,10 +217,43 @@ st_src_reg FakeCodeline::create_src_register(int src_idx, 
gl_register_file file)
return retval;
 }
 
-st_dst_reg FakeCodeline::create_dst_register(int dst_idx)
+st_src_reg *FakeCodeline::create_rel_src_register(int idx)
 {
-   return create_dst_register(dst_idx, dst_idx < 0 ?
- PROGRAM_OUTPUT : PROGRAM_TEMPORARY);
+   st_src_reg *retval = ralloc(mem_ctx, st_src_reg);
+   *retval = st_src_reg(PROGRAM_TEMPORARY, idx, GLSL_TYPE_INT);
+   if (max_temp_id < idx)
+  max_temp_id = idx;
+   return retval;
+}
+
+st_src_reg FakeCodeline::create_src_register(const tuple& src)
+{
+   int src_idx = std::get<0>(src);
+   int relidx1 = std::get<1>(src);
+   int relidx2 = std::get<2>(src);
+
+   gl_register_file file = PROGRAM_TEMPORARY;
+   if (src_idx < 0)
+  file = PROGRAM_OUTPUT;
+   else if (relidx1 || relidx2) {
+  file = PROGRAM_ARRAY;
+   }
+
+   st_src_reg retval = create_src_register(src_idx, file);
+   if (src_idx >= 0) {
+  if (relidx1 || relidx2) {
+ retval.array_id = 1;
+
+ if (relidx1)
+retval.reladdr = create_rel_src_register(relidx1);
+ if (relidx2) {
+retval.reladdr2 = create_rel_src_register(relidx2);
+retval.has_index2 = true;
+retval.index2D = 10;
+ }
+  }
+   }
+   return retval;
 }
 
 st_dst_reg FakeCodeline::create_dst_register(int dst_idx,int writemask)
@@ -215,6 +272,12 @@ st_dst_reg FakeCodeline::create_dst_register(int 
dst_idx,int writemask)
return st_dst_reg(file, writemask, GLSL_TYPE_INT, idx);
 }
 
+st_dst_reg FakeCodeline::create_dst_register(int dst_idx)
+{
+   return create_dst_register(dst_idx, dst_idx < 0 ?
+ PROGRAM_OUTPUT : PROGRAM_TEMPORARY);
+}
+
 st_dst_reg FakeCodeline::create_dst_register(int dst_idx, gl_register_file 
file)
 {
st_dst_reg retval;
@@ -233,6 +296,32 @@ st_dst_reg FakeCodeline::create_dst_register(int dst_idx, 
gl_register_file file)
return retval;
 }
 
+st_dst_reg FakeCodeline::create_dst_register(const tuple& dst)
+{
+   int dst_idx = std::get<0>(dst);
+   int relidx1 = std::get<1>(dst);
+   int relidx2 = std::get<2>(dst);
+
+   gl_register_file file = PROGRAM_TEMPORARY;
+   if (dst_idx < 0)
+  file = PROGRAM_OUTPUT;
+   else if (relidx1 || relidx2) {
+  file = PROGRAM_ARRAY;
+   }
+   st_dst_reg retval = create_dst_register(dst_idx, file);
+
+   if (relidx1 || relidx2) {
+  if (relidx1)
+ retval.reladdr = create_rel_src_register(relidx1);
+  if (relidx2) {
+ retval.reladdr2 = create_rel_src_register(relidx2);
+ retval.has_index2 = true;
+ retval.index2D = 10;
+  }
+   }
+   return retval;
+}
+
 glsl_to_tgsi_instruction *FakeCodeline::get_codeline() const
 {
glsl_to_tgsi_instruction *next_instr = new(mem_ctx) 
glsl_to_tgsi_instruction();
diff --git a/src/mesa/state_tracker/tests/st_tests_common.h 
b/src/mesa/state_tracker/tests/st_tests_common.h
index cea8a5ce08..2e18832923 100644
--- a/src/mesa/state_tracker/tests/st_tests_common.h
+++ b/src/mesa/state_tracker/tests/st_tests_common.h
@@ -35,9 +35,12 @@
 /* Use this to make the compiler pick the swizzle constructor below */
 struct SWZ {};
 
+/* Use this to make the compiler pick the constructor with reladdr below */
+struc

[Mesa-dev] [PATCH v4 10/10] mesa/st/glsl_to_tgsi: remove now unneeded assert.

2017-12-12 Thread Gert Wollny
With the implementation of the tracking of the registers used in reladdr
asserting that a driver calling merge_register() uses the address register
is no longer needed.

Signed-off-by: Gert Wollny 
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 8eeae86dab..88719fb94c 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -5287,7 +5287,6 @@ glsl_to_tgsi_visitor::merge_two_dsts(void)
 void
 glsl_to_tgsi_visitor::merge_registers(void)
 {
-   assert(need_uarl);
struct lifetime *lifetimes =
  rzalloc_array(mem_ctx, struct lifetime, this->next_temp);
 
-- 
2.13.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4 08/10] mesa/st/glsl_to_tgsi: Add tracking of indirect addressing registers

2017-12-12 Thread Gert Wollny
So far indirect addressing was not tracked to estimate the temporary
life time, and it was not needed, because code to load the address
registers was always emitted eliminating the reladdr* handles in the
past glsl-to.tgsi stages. Now, with Mareks patch allowing any 1D register
to be used for addressing n some hardware this changed, and
the tracking becomes necessary.

Because the registers have no direct indication on whether the reladdr* was
already loaded into an address register, the temporaries in reladdr* are
always tracked as reads. This may result in a slight over-estimation of the
lifetime in the cases when the load to the address register was emitted.

v2: no changes
v3: Use debug_log variable instead of directly writing to std::err in debugging
output.

Reviewed-by: Nicolai Hähnle  (v1)
---
 .../state_tracker/st_glsl_to_tgsi_temprename.cpp   | 108 ++---
 1 file changed, 74 insertions(+), 34 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.cpp
index 574e43bfe1..8a2e6fc646 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.cpp
@@ -873,6 +873,69 @@ public:
}
 };
 
+class access_recorder {
+public:
+   access_recorder(int _ntemps);
+   ~access_recorder();
+
+   void record_read(const st_src_reg& src, int line, prog_scope *scope);
+   void record_write(const st_dst_reg& src, int line, prog_scope *scope);
+
+   void get_required_lifetimes(struct lifetime *lifetimes);
+private:
+
+   int ntemps;
+   temp_access *acc;
+
+};
+
+access_recorder::access_recorder(int _ntemps):
+   ntemps(_ntemps)
+{
+   acc = new temp_access[ntemps];
+}
+
+access_recorder::~access_recorder()
+{
+   delete[] acc;
+}
+
+void access_recorder::record_read(const st_src_reg& src, int line,
+  prog_scope *scope)
+{
+   if (src.file == PROGRAM_TEMPORARY)
+  acc[src.index].record_read(line, scope, src.swizzle);
+
+   if (src.reladdr)
+  record_read(*src.reladdr, line, scope);
+   if (src.reladdr2)
+  record_read(*src.reladdr2, line, scope);
+}
+
+void access_recorder::record_write(const st_dst_reg& dst, int line,
+   prog_scope *scope)
+{
+   if (dst.file == PROGRAM_TEMPORARY)
+  acc[dst.index].record_write(line, scope, dst.writemask);
+
+   if (dst.reladdr)
+  record_read(*dst.reladdr, line, scope);
+   if (dst.reladdr2)
+  record_read(*dst.reladdr2, line, scope);
+}
+
+void access_recorder::get_required_lifetimes(struct lifetime *lifetimes)
+{
+   RENAME_DEBUG(debug_log << "= lifetimes ==\n");
+   for(int i = 0; i < ntemps; ++i) {
+  RENAME_DEBUG(debug_log<< setw(4) << i);
+  lifetimes[i] = acc[i].get_required_lifetime();
+  RENAME_DEBUG(debug_log << ": [" << lifetimes[i].begin << ", "
+<< lifetimes[i].end << "]\n");
+   }
+   RENAME_DEBUG(debug_log << "==\n\n");
+}
+
 }
 
 #ifndef NDEBUG
@@ -893,7 +956,6 @@ get_temp_registers_required_lifetimes(void *mem_ctx, 
exec_list *instructions,
int if_id = 1;
int switch_id = 0;
bool is_at_end = false;
-   bool ok = true;
int n_scopes = 1;
 
/* Count scopes to allocate the needed space without the need for
@@ -911,7 +973,8 @@ get_temp_registers_required_lifetimes(void *mem_ctx, 
exec_list *instructions,
}
 
prog_scope_storage scopes(mem_ctx, n_scopes);
-   temp_access *acc = new temp_access[ntemps];
+
+   access_recorder access(ntemps);
 
prog_scope *cur_scope = scopes.create(nullptr, outer_scope, 0, 0, line);
 
@@ -940,9 +1003,7 @@ get_temp_registers_required_lifetimes(void *mem_ctx, 
exec_list *instructions,
   case TGSI_OPCODE_IF:
   case TGSI_OPCODE_UIF: {
  assert(num_inst_src_regs(inst) == 1);
- const st_src_reg& src = inst->src[0];
- if (src.file == PROGRAM_TEMPORARY)
-acc[src.index].record_read(line, cur_scope, src.swizzle);
+ access.record_read(inst->src[0], line, cur_scope);
  cur_scope = scopes.create(cur_scope, if_branch, if_id++,
cur_scope->nesting_depth() + 1, line + 1);
  break;
@@ -968,14 +1029,12 @@ get_temp_registers_required_lifetimes(void *mem_ctx, 
exec_list *instructions,
   }
   case TGSI_OPCODE_SWITCH: {
  assert(num_inst_src_regs(inst) == 1);
- const st_src_reg& src = inst->src[0];
  prog_scope *scope = scopes.create(cur_scope, switch_body, switch_id++,
cur_scope->nesting_depth() + 1, 
line);
  /* We record the read only for the SWITCH statement itself, like it
   * is used by the only consumer of TGSI_OPCODE_SWITCH in tgsi_exec.c.
   */
- if (src.file == PROGRAM_TEMPORARY)
-acc[src.index].record_read(line, cur_scope, src.swizzle);
+ access.record_read(inst

[Mesa-dev] [PATCH v4 07/10] mesa/st/tests: Add tests for improved tracking of temporaries

2017-12-12 Thread Gert Wollny
Additional teste are added that check the tracking of access to temporaries
in if-else branches.

Signed-off-by: Gert Wollny 
---
 .../tests/test_glsl_to_tgsi_lifetime.cpp   | 493 -
 1 file changed, 486 insertions(+), 7 deletions(-)

diff --git a/src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp 
b/src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp
index 84a91f5988..a1dc28a387 100644
--- a/src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp
+++ b/src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp
@@ -164,7 +164,7 @@ TEST_F(LifetimeEvaluatorExactTest, MoveInIfInNestedLoop)
  * - value must survive from first write to last read in loop
  * for now we only check that the minimum life time is correct.
  */
-TEST_F(LifetimeEvaluatorAtLeastTest, WriteInIfAndElseInLoop)
+TEST_F(LifetimeEvaluatorExactTest, WriteInIfAndElseInLoop)
 {
const vector code = {
   { TGSI_OPCODE_MOV, {1}, {in0}, {}},
@@ -206,6 +206,137 @@ TEST_F(LifetimeEvaluatorExactTest, 
WriteInIfAndElseReadInElseInLoop)
run (code, temp_lt_expect({{-1,-1}, {0,9}, {1,9}, {7,10}}));
 }
 
+
+/* Test that a write in ELSE path only in loop is properly tracked:
+ * In loop if/else value written in else path and read outside
+ * - value must survive the whole loop.
+ */
+TEST_F(LifetimeEvaluatorExactTest, WriteInElseReadInLoop)
+{
+   const vector code = {
+  { TGSI_OPCODE_MOV, {1}, {in0}, {}},
+  { TGSI_OPCODE_BGNLOOP },
+  {   TGSI_OPCODE_IF, {}, {1}, {}},
+  { TGSI_OPCODE_UADD, {2}, {1,in0}, {}},
+  {   TGSI_OPCODE_ELSE },
+  { TGSI_OPCODE_ADD, {3}, {1,2}, {}},
+  {   TGSI_OPCODE_ENDIF},
+  {   TGSI_OPCODE_UADD, {1}, {3,in1}, {}},
+  { TGSI_OPCODE_ENDLOOP },
+  { TGSI_OPCODE_MOV, {out0}, {1}, {}},
+  { TGSI_OPCODE_END}
+   };
+   run (code, temp_lt_expect({{-1,-1}, {0,9}, {1,8}, {1,8}}));
+}
+
+/* Test that tracking a second write in an ELSE path is not attributed
+ * to the IF path: In loop if/else value written in else path twice and
+ * read outside - value must survive the whole loop
+ */
+TEST_F(LifetimeEvaluatorExactTest, WriteInElseTwiceReadInLoop)
+{
+   const vector code = {
+  { TGSI_OPCODE_MOV, {1}, {in0}, {}},
+  { TGSI_OPCODE_BGNLOOP },
+  {   TGSI_OPCODE_IF, {}, {1}, {}},
+  { TGSI_OPCODE_UADD, {2}, {1,in0}, {}},
+  {   TGSI_OPCODE_ELSE },
+  { TGSI_OPCODE_ADD, {3}, {1,2}, {}},
+  { TGSI_OPCODE_ADD, {3}, {1,3}, {}},
+  {   TGSI_OPCODE_ENDIF},
+  {   TGSI_OPCODE_UADD, {1}, {3,in1}, {}},
+  { TGSI_OPCODE_ENDLOOP },
+  { TGSI_OPCODE_MOV, {out0}, {1}, {}},
+  { TGSI_OPCODE_END}
+   };
+   run (code, temp_lt_expect({{-1,-1}, {0,10}, {1,9}, {1,9}}));
+}
+
+/* Test that the IF and ELSE scopes from different IF/ELSE pairs are not
+ * merged: In loop if/else value written in if, and then in different else path
+ * and read outside - value must survive the whole loop
+ */
+TEST_F(LifetimeEvaluatorExactTest, WriteInOneIfandInAnotherElseInLoop)
+{
+   const vector code = {
+  { TGSI_OPCODE_MOV, {1}, {in0}, {}},
+  { TGSI_OPCODE_BGNLOOP },
+  {   TGSI_OPCODE_IF, {}, {1}, {}},
+  { TGSI_OPCODE_UADD, {2}, {1,in0}, {}},
+  {   TGSI_OPCODE_ENDIF},
+  {   TGSI_OPCODE_IF, {}, {1}, {}},
+  {   TGSI_OPCODE_ELSE },
+  { TGSI_OPCODE_ADD, {2}, {1,1}, {}},
+  {   TGSI_OPCODE_ENDIF},
+  {   TGSI_OPCODE_UADD, {1}, {2,in1}, {}},
+  { TGSI_OPCODE_ENDLOOP },
+  { TGSI_OPCODE_MOV, {out0}, {1}, {}},
+  { TGSI_OPCODE_END}
+   };
+   run (code, temp_lt_expect({{-1,-1}, {0,11}, {1,10}}));
+}
+
+/* Test that with a new loop the resolution of the IF/ELSE write conditionality
+ * is restarted: In first loop value is written in both if and else, in second
+ * loop value is written only in if - must survive the second loop.
+ * However, the tracking is currently not able to restrict the lifetime
+ * in the first loop, hence the "AtLeast" test.
+ */
+TEST_F(LifetimeEvaluatorAtLeastTest, 
UnconditionalInFirstLoopConditionalInSecond)
+{
+   const vector code = {
+  { TGSI_OPCODE_MOV, {1}, {in0}, {}},
+  { TGSI_OPCODE_BGNLOOP },
+  {   TGSI_OPCODE_IF, {}, {1}, {}},
+  { TGSI_OPCODE_UADD, {2}, {1,in0}, {}},
+  {   TGSI_OPCODE_ELSE },
+  { TGSI_OPCODE_UADD, {2}, {1,in1}, {}},
+  {   TGSI_OPCODE_ENDIF},
+  { TGSI_OPCODE_ENDLOOP },
+  { TGSI_OPCODE_BGNLOOP },
+  {   TGSI_OPCODE_IF, {}, {1}, {}},
+  { TGSI_OPCODE_ADD, {2}, {in0,1}, {}},
+  {   TGSI_OPCODE_ENDIF},
+  {   TGSI_OPCODE_UADD, {1}, {2,in1}, {}},
+  { TGSI_OPCODE_ENDLOOP },
+  { TGSI_OPCODE_MOV, {out0}, {1}, {}},
+  { TGSI_OPCODE_END}
+   };
+   run (code, temp_lt_expect({{-1,-1}, {0,14}, {3,13}}));
+}
+
+/* Test that with a new loop the resolution of the IF/ELSE write conditionality
+ * is restarted, and also takes care of write before read in else scope:
+ * In first loop value is written in both if and else, i

[Mesa-dev] [PATCH v4 05/10] mesa/st/tests: cleanup whitespace usage and correct some comments

2017-12-12 Thread Gert Wollny
Signed-off-by: Gert Wollny 
---
 .../tests/test_glsl_to_tgsi_lifetime.cpp   | 127 ++---
 1 file changed, 63 insertions(+), 64 deletions(-)

diff --git a/src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp 
b/src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp
index 4f226429af..84a91f5988 100644
--- a/src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp
+++ b/src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp
@@ -183,9 +183,9 @@ TEST_F(LifetimeEvaluatorAtLeastTest, WriteInIfAndElseInLoop)
run (code, temp_lt_expect({{-1,-1}, {0,9}, {3,7}, {7,10}}));
 }
 
-/* In loop if/else value written in both path, read in else path
- * before write and also read later
- * - value must survive the whole loop
+/* Test that read before write in ELSE path is properly tracked:
+ * In loop if/else value written in both path but read in else path
+ * before write and also read later - value must survive the whole loop.
  */
 TEST_F(LifetimeEvaluatorExactTest, WriteInIfAndElseReadInElseInLoop)
 {
@@ -245,9 +245,9 @@ TEST_F(LifetimeEvaluatorExactTest, 
ReadInLoopInIfBeforeWriteAndLifeToTheEnd)
run (code, temp_lt_expect({{-1,-1}, {0,6}}));
 }
 
-/* In loop if/else read in one path before written in the same loop
- * read after the loop, value must survivethe whole loop and
- * to the read.
+/* In loop read before written in the same loop read after the loop,
+ * value must survive the whole loop and to the read.
+ * This is kind of undefined behaviour though ...
  */
 TEST_F(LifetimeEvaluatorExactTest, ReadInLoopBeforeWriteAndLifeToTheEnd)
 {
@@ -580,7 +580,6 @@ TEST_F(LifetimeEvaluatorExactTest, 
LoopWithReadWriteInSwitchDifferentCaseFallThr
run (code, temp_lt_expect({{-1,-1}, {0,8}}));
 }
 
-
 /* Here we read and write from an to the same temp in the same instruction,
  * but the read is conditional (select operation), hence the lifetime must
  * start with the first write.
@@ -588,21 +587,21 @@ TEST_F(LifetimeEvaluatorExactTest, 
LoopWithReadWriteInSwitchDifferentCaseFallThr
 TEST_F(LifetimeEvaluatorExactTest, WriteSelectFromSelf)
 {
const vector code = {
-  {TGSI_OPCODE_USEQ, {5}, {in0,in1}, {}},
-  {TGSI_OPCODE_UCMP, {1}, {5,in1,1}, {}},
-  {TGSI_OPCODE_UCMP, {1}, {5,in1,1}, {}},
-  {TGSI_OPCODE_UCMP, {1}, {5,in1,1}, {}},
-  {TGSI_OPCODE_UCMP, {1}, {5,in1,1}, {}},
-  {TGSI_OPCODE_FSLT, {2}, {1,in1}, {}},
-  {TGSI_OPCODE_UIF, {}, {2}, {}},
-  {  TGSI_OPCODE_MOV, {3}, {in1}, {}},
-  {TGSI_OPCODE_ELSE},
-  {  TGSI_OPCODE_MOV, {4}, {in1}, {}},
-  {  TGSI_OPCODE_MOV, {4}, {4}, {}},
-  {  TGSI_OPCODE_MOV, {3}, {4}, {}},
-  {TGSI_OPCODE_ENDIF},
-  {TGSI_OPCODE_MOV, {out1}, {3}, {}},
-  {TGSI_OPCODE_END}
+  { TGSI_OPCODE_USEQ, {5}, {in0,in1}, {}},
+  { TGSI_OPCODE_UCMP, {1}, {5,in1,1}, {}},
+  { TGSI_OPCODE_UCMP, {1}, {5,in1,1}, {}},
+  { TGSI_OPCODE_UCMP, {1}, {5,in1,1}, {}},
+  { TGSI_OPCODE_UCMP, {1}, {5,in1,1}, {}},
+  { TGSI_OPCODE_FSLT, {2}, {1,in1}, {}},
+  { TGSI_OPCODE_UIF, {}, {2}, {}},
+  {   TGSI_OPCODE_MOV, {3}, {in1}, {}},
+  { TGSI_OPCODE_ELSE},
+  {   TGSI_OPCODE_MOV, {4}, {in1}, {}},
+  {   TGSI_OPCODE_MOV, {4}, {4}, {}},
+  {   TGSI_OPCODE_MOV, {3}, {4}, {}},
+  { TGSI_OPCODE_ENDIF},
+  { TGSI_OPCODE_MOV, {out1}, {3}, {}},
+  { TGSI_OPCODE_END}
};
run (code, temp_lt_expect({{-1,-1}, {1,5}, {5,6}, {7,13}, {9,11}, {0,4}}));
 }
@@ -1162,21 +1161,21 @@ TEST_F(RegisterRemappingTest, 
RegisterRemappingMergeZeroLifetimeRegisters)
 TEST_F(RegisterLifetimeAndRemappingTest, LifetimeAndRemapping)
 {
const vector code = {
-  {TGSI_OPCODE_USEQ, {5}, {in0,in1}, {}},
-  {TGSI_OPCODE_UCMP, {1}, {5,in1,1}, {}},
-  {TGSI_OPCODE_UCMP, {1}, {5,in1,1}, {}},
-  {TGSI_OPCODE_UCMP, {1}, {5,in1,1}, {}},
-  {TGSI_OPCODE_UCMP, {1}, {5,in1,1}, {}},
-  {TGSI_OPCODE_FSLT, {2}, {1,in1}, {}},
-  {TGSI_OPCODE_UIF, {}, {2}, {}},
-  {  TGSI_OPCODE_MOV, {3}, {in1}, {}},
-  {TGSI_OPCODE_ELSE},
-  {  TGSI_OPCODE_MOV, {4}, {in1}, {}},
-  {  TGSI_OPCODE_MOV, {4}, {4}, {}},
-  {  TGSI_OPCODE_MOV, {3}, {4}, {}},
-  {TGSI_OPCODE_ENDIF},
-  {TGSI_OPCODE_MOV, {out1}, {3}, {}},
-  {TGSI_OPCODE_END}
+  { TGSI_OPCODE_USEQ, {5}, {in0,in1}, {}},
+  { TGSI_OPCODE_UCMP, {1}, {5,in1,1}, {}},
+  { TGSI_OPCODE_UCMP, {1}, {5,in1,1}, {}},
+  { TGSI_OPCODE_UCMP, {1}, {5,in1,1}, {}},
+  { TGSI_OPCODE_UCMP, {1}, {5,in1,1}, {}},
+  { TGSI_OPCODE_FSLT, {2}, {1,in1}, {}},
+  { TGSI_OPCODE_UIF, {}, {2}, {}},
+  {   TGSI_OPCODE_MOV, {3}, {in1}, {}},
+  { TGSI_OPCODE_ELSE},
+  {   TGSI_OPCODE_MOV, {4}, {in1}, {}},
+  {   TGSI_OPCODE_MOV, {4}, {4}, {}},
+  {   TGSI_OPCODE_MOV, {3}, {4}, {}},
+  { TGSI_OPCODE_ENDIF},
+  { TGSI_OPCODE_MOV, {out1}, {3}, {}},
+  { TGSI_OPCODE_END}
};
run (code, vector({0,1,5,5,1,5}));
 }
@@ -1184,15 +1183,15 @@ TEST_F(Register

[Mesa-dev] [PATCH v4 04/10] mesa/st/tests: unify MockCodeLine* classes

2017-12-12 Thread Gert Wollny
 * Merge the classes MockCodeLine and MockCodelineWithSwizzle into
   one, and  refactor tests accordingly.
 * Change memory allocations to use ralloc* interface.

 v4:
 * move the test classes into a conveniance library
 * rename the Mock* classes to Fake* since they are not really
   Mocks
 * Base assertion of correct number of src and dst registers in tests
   on what the operatand actually expects
 * Fix number of destinations in one test
 * Rename typedef "expectation" to "temp_lt_expect"

Signed-off-by: Gert Wollny 
---
 src/mesa/state_tracker/tests/Makefile.am   |   7 +
 src/mesa/state_tracker/tests/st_tests_common.cpp   | 397 
 src/mesa/state_tracker/tests/st_tests_common.h | 163 +
 .../tests/test_glsl_to_tgsi_lifetime.cpp   | 720 +
 4 files changed, 739 insertions(+), 548 deletions(-)
 create mode 100644 src/mesa/state_tracker/tests/st_tests_common.cpp
 create mode 100644 src/mesa/state_tracker/tests/st_tests_common.h

diff --git a/src/mesa/state_tracker/tests/Makefile.am 
b/src/mesa/state_tracker/tests/Makefile.am
index 6c58d36769..dcd396918d 100644
--- a/src/mesa/state_tracker/tests/Makefile.am
+++ b/src/mesa/state_tracker/tests/Makefile.am
@@ -18,8 +18,13 @@ AM_CPPFLAGS = \
 if HAVE_STD_CXX11
 TESTS = st-renumerate-test
 check_PROGRAMS = st-renumerate-test
+
+noinst_LIBRARIES = libmesa-st-tests-common.a
 endif
 
+libmesa_st_tests_common_a_SOURCES = \
+   st_tests_common.cpp
+
 st_renumerate_test_SOURCES =   \
test_glsl_to_tgsi_lifetime.cpp
 
@@ -27,6 +32,7 @@ st_renumerate_test_LDFLAGS = \
$(LLVM_LDFLAGS)
 
 st_renumerate_test_LDADD = \
+   libmesa-st-tests-common.a \
$(top_builddir)/src/mesa/libmesagallium.la \
$(top_builddir)/src/mapi/shared-glapi/libglapi.la \
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
@@ -34,3 +40,4 @@ st_renumerate_test_LDADD = \
$(top_builddir)/src/gtest/libgtest.la \
$(GALLIUM_COMMON_LIB_DEPS) \
$(LLVM_LIBS)
+
diff --git a/src/mesa/state_tracker/tests/st_tests_common.cpp 
b/src/mesa/state_tracker/tests/st_tests_common.cpp
new file mode 100644
index 00..91fd39343f
--- /dev/null
+++ b/src/mesa/state_tracker/tests/st_tests_common.cpp
@@ -0,0 +1,397 @@
+/*
+ * Copyright © 2017 Gert Wollny
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "st_tests_common.h"
+
+#include 
+#include 
+#include 
+#include 
+
+
+
+#include 
+
+#include 
+#include 
+
+using std::vector;
+using std::pair;
+using std::make_pair;
+using std::transform;
+using std::copy;
+using std::tuple;
+
+
+/* Implementation of helper and test classes */
+void *FakeCodeline::mem_ctx = nullptr;
+
+FakeCodeline::FakeCodeline(unsigned _op, const vector& _dst,
+   const vector& _src, const vector&_to):
+   op(_op),
+   max_temp_id(0)
+{
+   transform(_dst.begin(), _dst.end(), std::back_inserter(dst),
+ [this](int i) { return create_dst_register(i);});
+
+   transform(_src.begin(), _src.end(), std::back_inserter(src),
+ [this](int i) { return create_src_register(i);});
+
+   transform(_to.begin(), _to.end(), std::back_inserter(tex_offsets),
+ [this](int i) { return create_src_register(i);});
+
+}
+
+FakeCodeline::FakeCodeline(unsigned _op, const vector>& _dst,
+   const vector>& _src,
+   const vector>&_to,
+   SWZ with_swizzle):
+   op(_op),
+   max_temp_id(0)
+{
+   (void)with_swizzle;
+
+   transform(_dst.begin(), _dst.end(), std::back_inserter(dst),
+ [this](pair r) {
+  return create_dst_register(r.first, r.second);
+   });
+
+   transform(_src.begin(), _src.end(), std::back_inserter(src),
+ [this](const pair& r) {
+  return create_src_register(r.first, r.second);
+   });
+
+   transform(_to.begin(), _to.end(), std::ba

[Mesa-dev] [PATCH v4 01/10] mesa/program: Add missing file types to _mesa_register_file_name

2017-12-12 Thread Gert Wollny
This is useful for debugging in glsl_to_tgsi. 

Signed-off-by: Gert Wollny 
---
 src/mesa/program/prog_print.c | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/mesa/program/prog_print.c b/src/mesa/program/prog_print.c
index 9462510f3a..b273bbf4c8 100644
--- a/src/mesa/program/prog_print.c
+++ b/src/mesa/program/prog_print.c
@@ -50,12 +50,14 @@ _mesa_register_file_name(gl_register_file f)
switch (f) {
case PROGRAM_TEMPORARY:
   return "TEMP";
-   case PROGRAM_STATE_VAR:
-  return "STATE";
+   case PROGRAM_ARRAY:
+  return "ARRAY";
case PROGRAM_INPUT:
   return "INPUT";
case PROGRAM_OUTPUT:
   return "OUTPUT";
+   case PROGRAM_STATE_VAR:
+  return "STATE";
case PROGRAM_CONSTANT:
   return "CONST";
case PROGRAM_UNIFORM:
@@ -68,6 +70,16 @@ _mesa_register_file_name(gl_register_file f)
   return "SYSVAL";
case PROGRAM_UNDEFINED:
   return "UNDEFINED";
+   case PROGRAM_IMMEDIATE:
+  return "IMM";
+   case PROGRAM_BUFFER:
+  return "BUFFER";
+   case PROGRAM_MEMORY:
+  return "MEMORY";
+   case PROGRAM_IMAGE:
+  return "IMAGE";
+   case PROGRAM_HW_ATOMIC:
+  return "HWATOMIC";
default:
   {
  static char s[20];
-- 
2.13.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4 03/10] mesa/st/tests: Fix zero-byte allocation leaks

2017-12-12 Thread Gert Wollny
Don't allocate a zero-sized array, when no texture offsets are given.

Reviewed-by: Nicolai Hähnle 
Signed-off-by: Gert Wollny 
---
 .../tests/test_glsl_to_tgsi_lifetime.cpp   | 23 +++---
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp 
b/src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp
index 93f4020ebf..d0ac8b1020 100644
--- a/src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp
+++ b/src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp
@@ -1374,10 +1374,14 @@ MockShader::MockShader(const 
vector& source):
  next_instr->dst[k] = create_dst_register(i.dst[k].first, 
i.dst[k].second);
   }
   next_instr->tex_offset_num_offset = i.tex_offsets.size();
-  next_instr->tex_offsets = new st_src_reg[i.tex_offsets.size()];
-  for (unsigned k = 0; k < i.tex_offsets.size(); ++k) {
- next_instr->tex_offsets[k] = 
create_src_register(i.tex_offsets[k].first,
-  
i.tex_offsets[k].second);
+  if (next_instr->tex_offset_num_offset > 0) {
+ next_instr->tex_offsets = new st_src_reg[i.tex_offsets.size()];
+ for (unsigned k = 0; k < i.tex_offsets.size(); ++k) {
+next_instr->tex_offsets[k] = 
create_src_register(i.tex_offsets[k].first,
+ 
i.tex_offsets[k].second);
+ }
+  } else {
+ next_instr->tex_offsets = nullptr;
   }
   program->push_tail(next_instr);
}
@@ -1407,10 +1411,15 @@ MockShader::MockShader(const vector& 
source):
  next_instr->dst[k] = create_dst_register(i.dst[k]);
   }
   next_instr->tex_offset_num_offset = i.tex_offsets.size();
-  next_instr->tex_offsets = new st_src_reg[i.tex_offsets.size()];
-  for (unsigned k = 0; k < i.tex_offsets.size(); ++k) {
- next_instr->tex_offsets[k] = create_src_register(i.tex_offsets[k]);
+  if (next_instr->tex_offset_num_offset > 0) {
+ next_instr->tex_offsets = new st_src_reg[i.tex_offsets.size()];
+ for (unsigned k = 0; k < i.tex_offsets.size(); ++k) {
+next_instr->tex_offsets[k] = create_src_register(i.tex_offsets[k]);
+ }
+  } else {
+ next_instr->tex_offsets = nullptr;
   }
+
   program->push_tail(next_instr);
}
++num_temps;
-- 
2.13.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4 02/10] mesa/st/glsl_to_tgsi: Add some operators for glsl_to_tgsi related classes

2017-12-12 Thread Gert Wollny
Add the equal operator and the "<<" stream write operator for the
st_*_reg classes and the "<<" operator to the instruction class, and
make use of these operators in the debugging output.

Signed-off-by: Gert Wollny 
---
 src/mesa/state_tracker/st_glsl_to_tgsi_private.cpp | 168 +
 src/mesa/state_tracker/st_glsl_to_tgsi_private.h   |  19 +++
 .../state_tracker/st_glsl_to_tgsi_temprename.cpp   | 105 +++--
 3 files changed, 204 insertions(+), 88 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi_private.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi_private.cpp
index b664fa7ec3..b098a20754 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi_private.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi_private.cpp
@@ -26,6 +26,7 @@
 #include "st_glsl_to_tgsi_private.h"
 #include 
 #include 
+#include 
 
 static int swizzle_for_type(const glsl_type *type, int component = 0)
 {
@@ -179,6 +180,81 @@ st_src_reg st_src_reg::get_abs()
return reg;
 }
 
+bool operator == (const st_src_reg& lhs, const st_src_reg& rhs)
+{
+   bool result;
+
+   if (lhs.type != rhs.type ||
+   lhs.file != rhs.file ||
+   lhs.index != rhs.index ||
+   lhs.swizzle != rhs.swizzle ||
+   lhs.index2D != rhs.index2D ||
+   lhs.has_index2 != rhs.has_index2 ||
+   lhs.array_id != rhs.array_id ||
+   lhs.negate != rhs.negate ||
+   lhs.abs != rhs.abs ||
+   lhs.double_reg2 != rhs.double_reg2 ||
+   lhs.is_double_vertex_input != rhs.is_double_vertex_input)
+  return false;
+
+
+   if (lhs.reladdr) {
+  if (!rhs.reladdr)
+ return false;
+  result = (*lhs.reladdr == *rhs.reladdr);
+   } else {
+  result = !rhs.reladdr;
+   }
+
+   if (lhs.reladdr2) {
+  if (!rhs.reladdr2)
+ return false;
+  result &= (*lhs.reladdr2 == *rhs.reladdr2);
+   } else {
+  result &= !rhs.reladdr2;
+   }
+   return result;
+}
+
+static const char swz_txt[] = "xyzw";
+
+std::ostream& operator << (std::ostream& os, const st_src_reg& reg)
+{
+
+   if (reg.negate)
+  os << "-";
+   if (reg.abs)
+  os << "|";
+
+   os << _mesa_register_file_name(reg.file);
+
+   if (reg.file == PROGRAM_ARRAY) {
+  os << "(" << reg.array_id << ")";
+   }
+   if (reg.has_index2) {
+  os << "[";
+  if (reg.reladdr2) {
+ os << *reg.reladdr2;
+  }
+  os << "+" << reg.index2D << "]";
+   }
+   os << "[";
+   if (reg.reladdr) {
+  os << *reg.reladdr;
+   }
+   os << reg.index << "].";
+   for (int i = 0; i < 4; ++i) {
+  int swz = GET_SWZ(reg.swizzle, i);
+  if (swz < 4)
+ os << swz_txt[swz];
+  else
+ os << "_";
+   }
+   if (reg.abs)
+  os << "|";
+   return os;
+}
+
 st_dst_reg::st_dst_reg(st_src_reg reg)
 {
this->type = reg.type;
@@ -250,3 +326,95 @@ void st_dst_reg::operator=(const st_dst_reg ®)
this->has_index2 = reg.has_index2;
this->array_id = reg.array_id;
 }
+
+bool operator == (const st_dst_reg& lhs, const st_dst_reg& rhs)
+{
+   bool result;
+
+   if (lhs.type != rhs.type ||
+   lhs.file != rhs.file ||
+   lhs.index != rhs.index ||
+   lhs.writemask != rhs.writemask ||
+   lhs.index2D != rhs.index2D ||
+   lhs.has_index2 != rhs.has_index2 ||
+   lhs.array_id != rhs.array_id)
+  return false;
+
+
+   if (lhs.reladdr) {
+  if (!rhs.reladdr)
+ return false;
+  result = (*lhs.reladdr == *rhs.reladdr);
+   } else {
+  result = !rhs.reladdr;
+   }
+
+   if (lhs.reladdr2) {
+  if (!rhs.reladdr2)
+ return false;
+  result &= (*lhs.reladdr2 == *rhs.reladdr2);
+   } else {
+  result &= !rhs.reladdr2;
+   }
+   return result;
+}
+
+std::ostream& operator << (std::ostream& os, const st_dst_reg& reg)
+{
+
+   os << _mesa_register_file_name(reg.file);
+   if (reg.file == PROGRAM_ARRAY) {
+  os << "(" << reg.array_id << ")";
+   }
+   if (reg.has_index2) {
+  os << "[";
+  if (reg.reladdr2) {
+ os << *reg.reladdr2;
+  }
+  os << "+" << reg.index2D << "]";
+   }
+   os << "[";
+   if (reg.reladdr) {
+  os << *reg.reladdr;
+   }
+   os << reg.index << "].";
+   for (int i = 0; i < 4; ++i) {
+  if (1 << i & reg.writemask)
+ os << swz_txt[i];
+  else
+ os << "_";
+   }
+
+   return os;
+}
+
+void glsl_to_tgsi_instruction::print(std::ostream& os) const
+{
+   os << tgsi_get_opcode_name(info->opcode) << " ";
+
+   bool has_operators = false;
+   for (unsigned j = 0; j < num_inst_dst_regs(this); j++) {
+  has_operators = true;
+  if (j > 0)
+ os << ", ";
+  os << dst[j];
+   }
+
+   if (has_operators)
+  os << " := ";
+
+   for (unsigned j = 0; j < num_inst_src_regs(this); j++) {
+  if (j > 0)
+ os << ", ";
+  os << src[j];
+   }
+
+   if (tex_offset_num_offset > 0) {
+  os << ", TEXOFS: ";
+  for (unsigned j = 0; j < tex_offset_num_offset; j++) {
+ if (j > 0)
+os << ", ";
+ os << tex_offsets[j];
+  }
+   }

[Mesa-dev] [PATCH v3 00/10] glsl_to_tgsi: Further improvement of lifetime tracking for register merge

2017-12-12 Thread Gert Wollny
Dear all,

this is another update to the patch set that adds enhanced tracking of IF/ELSE
branches and tracking of reladdr* registers for the register_merge step.

It cleans up the debugging output and moves the testing classes into a 
conveniance 
library. The latter is in preparation for another series that will deal with 
array 
merging. 

The series shows no piglit regressions and I use it already for quite some 
time. 

So far patches 3 (was 1 in v3) and 8 (was 5 in v1)

  Reviewed-by: Nicolai Hähnle 

Changes w.r.t. v3:

* Add patch 1: mesa/program: Add missing file types to _mesa_register_file_name
* Ass patch 2: Add stream and equal operators and rewrite the debug output to
  use the stream operator and  _mesa_register_file_name instead of some
  hand backed code and squeash former patch 5 into this. 
* Move the Mock* classes and shared test code into a conveniance
  library, and rename Mock* to Fake* since they are not really Mocks.

Changes w.r.t. v2:

* patch 9: make the creation of register description tuples explicit because 
  this is what in c++11 is actually required (This slipped before because it 
  seems that g++-7.2 handles tuple initialization like it was c++17, also with
  its default setting -std=c++14). 

v1: 

* patches 2-4(new): As suggested by Nikolai, these patches unify the test 
classes 
  with respect to the different register inputs (at this point: plain and with 
  swizzle). In addition, some comments are corrected and the used of white 
spaces 
  in the test cases is made more consistent. 
* patch 5: correct the debug output for indirect addressing. Nikolai suggested 
that 
  another patch might be in order to properly propagate the information when 
and 
  which address register is used, but since st_*_reg is passed through various 
  levels by value, I'd prefer to deal with that in another, dedicated patch 
series.
* patch 6: Further improve the tracking algorithm, and, as requested by 
Nikolai, 
  rename some variables and add comments to make the algorithm clearer.
* patch 7: Add yet more tests. 
* patch 9: Update the tests to adhere to the new, unified interface. 
* patch 10 (new): remove the no longer needed assert for the use of address 
registers 
  in register_merge (I was considering to add this to 8, but since that one was 
already 
  reviewed ...)

many thanks for any comments, 
Gert

PS: I have no write access to mesa-git. 

Gert Wollny (10):
  mesa/st/tests: Fix zero-byte allocation leaks
  mesa/st/tests: unify MockCodeLine* classes
  mesa/st/tests: base check of number of registers on opcode info
  mesa/st/tests: cleanup whitespace usage and correct some comments
  mesa/st/glsl_to_tgsi: Correct debug output for indirect access
  mesa/st/glsl_to_tgsi: Add tracking of ifelse writes in register
merging
  mesa/st/tests: Add tests for improved tracking of temporaries
  mesa/st/glsl_to_tgsi: Add tracking of indirect addressing registers
  mesa/st/tests: Add tests for lifetime tracking with indirect
addressing
  mesa/st/glsl_to_tgsi: remove now unneeded assert.

 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |1 -
 .../state_tracker/st_glsl_to_tgsi_temprename.cpp   |  540 +++--
 .../tests/test_glsl_to_tgsi_lifetime.cpp   | 1278 +++-
 3 files changed, 1401 insertions(+), 418 deletions(-)

-- 
2.13.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/2] i965: scratch space fixes (v2)

2017-12-12 Thread Eero Tamminen

Hi,

Tested-by: Eero Tamminen 

Fixes GPU hangs I'm seeing also with the *GL* version of CarChase on KBL 
GT3e (when using Ubuntu 16.04 kernel).


- Eero

On 12.12.2017 14:17, kevin.rogo...@intel.com wrote:

From: Kevin Rogovin 

This patch series offers a readability improvement for programming
MEDIA_VFE_STATE and fixes a scratch space sizing bug for Gen9.
Together with the ASTC5x5 fixes posted before, carchase on GLES
works on my SKL GT4.

v2:
  correctly state that first patch is just readability patch
  corrently compute subslices as 4 times number slices

Kevin Rogovin (2):
   i965: Program MEDIA_VFE_STATE in a more readable fashion.
   i965: compute scratch space size correctly for Gen9

  src/mesa/drivers/dri/i965/brw_program.c   |  6 +-
  src/mesa/drivers/dri/i965/genX_state_upload.c | 19 +--
  2 files changed, 18 insertions(+), 7 deletions(-)



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] intel/fs: Implement GRF bank conflict mitigation pass.

2017-12-12 Thread Eero Tamminen

Hi,

On 11.12.2017 12:28, Eero Tamminen wrote:

Thanks for finally having this handled in Mesa!

This patch series, live intervals and "Don't let undefined values 
prevent copy propagation" commits help performance in following tests:

* GfxBench CarChase (2% by live intervals)


That was on SKL GT2.

On BXT J4205, it (or the whole set of commits) improved CarChase by 4-5%!



* GfxBench AztecRuins & Manhattan 3.0 (very marginally)
* GfxBench Tessellation & ALU (not ALU2)
* GpuTest Volplosion & Julia FP64 (maybe also FurMark)
* SynMark CSDof (2-3% by copy propagation)
* SynMark PSPom (1% by live intervals)

Most visible improvements are on (all) GEN9+ platforms, but several of 
them are visible also on earlier GENS.



Shader compilation speed (in SynMark DrvShComp) drops by ~10%, mainly 
from the the copy propagation commit.



Live intervals commit may have introduced small rendering regression in 
DOTA2 (Vulkan version), I'll check that next.


I wasn't able to reproduce that with real games, so I assume it to be an 
issue with Vulkan trace / replay used for the render validation (trace 
replay showed a lot of barrier errors with Vulkan API validation).


-> Seems I need to update our DOTA2 (and other games) validation traces 
after updating vktrace/replay.  Hopefully new versions work better with 
latest Mesa code.



- Eero


On 06.12.2017 22:38, Francisco Jerez wrote:

This series (which is ready for production and improves the cycle count
of over 46k shaders) has been sitting here for nearly half a year.  I'm
planning to self-review it and land it (along with PATCH 3/2 I just sent
to make sure we keep regressions under control) if nobody else does in
the next two weeks.

Francisco Jerez  writes:


Unnecessary GRF bank conflicts increase the issue time of ternary
instructions (the overwhelmingly most common of which is MAD) by
roughly 50%, leading to reduced ALU throughput.  This pass attempts to
minimize the number of bank conflicts by rearranging the layout of the
GRF space post-register allocation.  It's in general not possible to
eliminate all of them without introducing extra copies, which are
typically more expensive than the bank conflict itself.

In a shader-db run on SKL this helps roughly 46k shaders:

    total conflicts in shared programs: 1008981 -> 600461 (-40.49%)
    conflicts in affected programs: 816222 -> 407702 (-50.05%)
    helped: 46234
    HURT: 72

The running time of shader-db itself on SKL seems to be increased by
roughly 2.52%±1.13% with n=20 due to the additional work done by the
compiler back-end.

On earlier generations the pass is somewhat less effective in relative
terms because the hardware incurs a bank conflict anytime the last two
sources of the instruction are duplicate (e.g. while trying to square
a value using MAD), which is impossible to avoid without introducing
copies.  E.g. for a shader-db run on SNB:

    total conflicts in shared programs: 944636 -> 623185 (-34.03%)
    conflicts in affected programs: 853258 -> 531807 (-37.67%)
    helped: 31052
    HURT: 19

And on BDW:

    total conflicts in shared programs: 1418393 -> 987539 (-30.38%)
    conflicts in affected programs: 1179787 -> 748933 (-36.52%)
    helped: 47592
    HURT: 70

On SKL GT4e this improves performance of GpuTest Volplosion by 3.64%
±0.33% with n=16.

NOTE: This patch intentionally disregards some i965 coding conventions
   for the sake of reviewability.  This is addressed by the next
   squash patch which introduces an amount of (for the most part
   boring) boilerplate that might distract reviewers from the
   non-trivial algorithmic details of the pass.
---
  src/intel/Makefile.sources   |   1 +
  src/intel/compiler/brw_fs.cpp    |   2 +
  src/intel/compiler/brw_fs.h  |   1 +
  src/intel/compiler/brw_fs_bank_conflicts.cpp | 791 
+++

  4 files changed, 795 insertions(+)
  create mode 100644 src/intel/compiler/brw_fs_bank_conflicts.cpp

diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources
index a877ff2..1b9799a 100644
--- a/src/intel/Makefile.sources
+++ b/src/intel/Makefile.sources
@@ -44,6 +44,7 @@ COMPILER_FILES = \
  compiler/brw_eu_util.c \
  compiler/brw_eu_validate.c \
  compiler/brw_fs_builder.h \
+    compiler/brw_fs_bank_conflicts.cpp \
  compiler/brw_fs_cmod_propagation.cpp \
  compiler/brw_fs_combine_constants.cpp \
  compiler/brw_fs_copy_propagation.cpp \
diff --git a/src/intel/compiler/brw_fs.cpp 
b/src/intel/compiler/brw_fs.cpp

index 43b6e34..0a85c0c 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -5858,6 +5858,8 @@ fs_visitor::allocate_registers(bool 
allow_spilling)

 if (failed)
    return;
+   opt_bank_conflicts();
+
 schedule_instructions(SCHEDULE_POST);
 if (last_scratch > 0) {
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 6c8c027..b1fc7b3 100644
--- 

[Mesa-dev] [PATCH 2/2] i965: compute scratch space size correctly for Gen9

2017-12-12 Thread kevin . rogovin
From: Kevin Rogovin 

Signed-off-by: Kevin Rogovin 
---
 src/mesa/drivers/dri/i965/brw_program.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
b/src/mesa/drivers/dri/i965/brw_program.c
index 6aa4100..1ae0aa0 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -368,9 +368,13 @@ brw_alloc_stage_scratch(struct brw_context *brw,
*
* According to the other driver team, this applies to compute shaders
* as well.  This is not currently documented at all.
+   *
+   * brw->screen->subslice_total is the TOTAL number of subslices
+   * and we wish to view that there are 4 subslices per slice
+   * instead of the actual number of subslices per slice.
*/
   if (devinfo->gen >= 9)
- subslices = 4;
+ subslices = 4 * brw->screen->devinfo.num_slices;
 
   /* WaCSScratchSize:hsw
*
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >